Skip to contents

This is the well-known well-log dataset used in many changepoint papers obtained from Alan Turing Institute GitHub repository and licensed under the MIT license.

Usage

well_log

Format

A Time-Series of length 4050.

Source

<https://github.com/alan-turing-institute/TCPD>

Examples

result <- fastcpd.mean(well_log, trim = 0.001)
summary(result)
#> 
#> Call:
#> fastcpd.mean(data = well_log, trim = 0.001)
#> 
#> Change points:
#> 7 19 65 356 445 717 792 1034 1070 1215 1368 1428 1526 1684 1866 2047 2409 2469 2531 2591 2775 3166 3314 3490 3533 3673 3744 3855 3886 3945 3963 4035 
#> 
#> Cost values:
#> 70.90268 131.7555 383.0472 2445.132 758.3834 2296.059 637.0301 1996.247 302.8254 1840.756 2267.745 605.6831 1054.037 1310.089 1530.8 1508.578 2990.751 491.6914 513.248 487.8511 1872.188 3796.243 1220.895 1461.687 396.427 1172.664 602.3776 918.5697 277.6224 593.5602 197.5627 611.3688 133.8662 
plot(result)

# \donttest{
if (requireNamespace("matrixStats", quietly = TRUE)) {
  sigma2 <- variance.median(well_log)
  median_loss <- function(data) {
    sum(abs(data - matrixStats::colMedians(data))) / sqrt(sigma2) / 2
  }
  result <- fastcpd(
    formula = ~ x - 1,
    data = cbind.data.frame(x = well_log),
    cost = median_loss,
    trim = 0.002
  )
  summary(result)

  segment_starts <- c(1, result@cp_set)
  segment_ends <- c(result@cp_set - 1, length(well_log))
  residual <- NULL
  for (segment_index in seq_along(segment_starts)) {
    segment <-
      well_log[segment_starts[segment_index]:segment_ends[segment_index]]
    residual <- c(residual, segment - median(segment))
  }

  result@residuals <- matrix(residual)
  result@family <- "mean"
  result@data <- data.frame(x = c(well_log))
  plot(result)
}
#> 
#> Call:
#> fastcpd(formula = ~x - 1, data = cbind.data.frame(x = well_log), 
#>     cost = median_loss, trim = 0.002)
#> 
#> Change points:
#> 19 577 1034 1070 1216 1361 1428 1526 1685 1866 2047 2409 2469 2531 2591 2775 3744 3855 3945 3963 

# }