# The function 'bfastmonitor' from the R-package BFAST
function (data, start, formula = response ~ trend + harmon, order = 3,
lag = NULL, slag = NULL, history = c("ROC", "BP", "all"),
type = "OLS-MOSUM", h = 0.25, end = 10, level = 0.05, hpc = "none",
verbose = FALSE, plot = FALSE)
{
# Replicates the level value two times.
level <- rep(level, length.out = 2)
# If the time series (data) is not a ts object it is coerced to such.
if (!is.ts(data))
data <- as.ts(data)
# Returns the number of samples per unit time.
freq <- frequency(data)
# 'time2num' is a function to convert parameter x to a numeric time
# stamp when the start parameter of bfastmonitor() is passed as pair of
# period / cycle. For a time series of frequency f, time n+i/f is
# presented as c(n, i+1). For further understanding execute ?start
# and see the Details section.
# L is used as a suffix to qualify any number with the intent of making
# it an explicit integer. It is useed to get the code to run faster and
# consume less memory. A double ("numeric") vector uses 8 bytes per
# element. An integer vector uses only 4 bytes per element.
time2num <- function(x) if (length(x) > 1L)
x[1L] + (x[2L] - 1)/freq
else x
# Initialize the starting date of the monitoring period.
start <- time2num(start)
# Dataframe for subsequent regression modeling.
data_tspp <- bfastpp(data, order = order, lag = lag, slag = slag)
# Building a subset from data_tspp containing only the data until the
# beginning of the monitoring period (the data of the history period).
history_tspp <- subset(data_tspp, time < start)
#############################################################
### IDENTIFY THE START TIME OF THE STABLE HISTORY PERIOD ####
#############################################################
# If the passed history parameter is null, extract the time when the
# first observation was taken.
if (is.null(history)) {
# Compare to the implementation of history parameter "all".
history <- start(history_tspp$response)
}
# Executed in case the passed history parameter is a character string.
else if (all(is.character(history))) {
# Matches the perhaps abbreviated passed history parameter against
# possible parameter values that are obtained from the the signature
# of the function from which match.arg is called (-> bfastmonitor()).
history <- match.arg(history)
# Switch evaluates the history parameter and accordingly executes
# one of the further functions. The returned result is the start time
# of the stable history period and is stored back to the history variable.
history <- switch(history,
# The full temporal range of the history period is used as the stable
# history period. The time the first observation was taken is extracted.
all = start(history_tspp$response),
# See annotated 'history_roc' function below.
ROC = history_roc(formula, data = history_tspp, level = level[2]),
# See annotated 'history_break' function below.
BP = history_break(formula, data = history_tspp, hpc = hpc)
)
}
# Executed in case the history parameter was passed as a function.
# This allows the user to pass an own data-driven process that detects
# a suitable start time for the stable history period.
else if (all(is.function(history))) {
# history(formula, data) computes a numeric start date
history <- history(formula, data = history_tspp)
}
# Converting the period-cycle notation of the start time of the stable
# history to a numeric time stamp.
history <- time2num(history)
###########################################
### MODELING THE STABLE HISTORY PERIOD ####
###########################################
# Selecting the data of the stable history period.
history_tspp <- subset(history_tspp, time >= history)
# Prints information about the monitoring during computation
# when 'verbose' variable is equals TRUE.
if (verbose) {
cat("\nBFAST monitoring\n\n1. History period\n")
cat(sprintf("Stable period selected: %i(%i)--%i(%i)\n",
start(history_tspp$response)[1], start(history_tspp$response)[2],
end(history_tspp$response)[1], end(history_tspp$response)[2]))
cat(sprintf("Length (in years): %f\n", NROW(history_tspp)/freq))
}
# test_tssp holds the data of the stable history period.
test_tspp <- history_tspp
# test_mefp is the seasonal model for the stable history period.
test_mefp <- mefp(formula, data = test_tspp, type = type,
period = end, h = h, alpha = level[1])
# test_lm ist the linear trend model for the stable history period.
test_lm <- lm(formula, data = test_tspp)
# Check for a sufficient length of the stable history period.
if (floor(h * NROW(test_tspp)) <= 1 | NROW(test_tspp) <=
length(coef(test_lm))) {
ok <- FALSE
warning("too few observations in selected history period")
}
else {
ok <- TRUE
}
# Print coeffiecients of the linear model if 'verbose' is equals TRUE.
if (verbose) {
cat("Model fit:\n")
print(coef(test_lm))
}
#######################################################
## ANALYZING THE MONITORING PERIOD FOR ABRUPT CHANGE ##
#######################################################
# Selecting the monitoring period.
test_tspp <- subset(data_tspp, time >= history)
# If the stable history period has a sufficient length the
# monitoring period is monitored for abrupt change and if any
# breakpoint is detected the time of change is extracted.
if (ok) {
# Monitor the monitoring period for abrupt change.
test_mon <- monitor(test_mefp, data = test_tspp, verbose = FALSE)
# If breakpoint was found its time is extracted
tbp <- if (is.na(test_mon$breakpoint))
NA
else test_tspp$time[test_mon$breakpoint]
# Print information on the monitoring if 'verbose' is equals TRUE.
if (verbose) {
cat("\n\n2. Monitoring period\n")
cat(sprintf("Monitoring starts at: %i(%i)\n", floor(start),
round((start - floor(start)) * freq) + 1))
if (is.na(tbp)) {
cat("Break detected at: -- (no break)\n\n")
}
else {
cat(sprintf("Break detected at: %i(%i)\n\n",
floor(tbp), round((tbp - floor(tbp)) * freq) +
1))
}
}
}
# If the stable history period has NO sufficient length the two
# variables are resetted.
else {
test_mon <- NA
tbp <- NA
}
# Calculate the change magnitude of bfastmonitor: the magnitude
# is the median of the difference between the data and the model
# prediction in the monitoring period.
if (ok) {
test_tspp$prediction <- predict(test_lm, newdata = test_tspp)
new_data <- subset(test_tspp, time >= start)
magnitude <- median(new_data$response - new_data$prediction,
na.rm = TRUE)
}
else {
test_tspp$prediction <- NA
magnitude <- NA
}
# The return values of bfastmonitor are selected.
rval <- list(data = data, tspp = test_tspp, model = test_lm,
mefp = test_mon, history = c(head(history_tspp$time,
1), tail(history_tspp$time, 1)), monitor = c(start,
tail(test_tspp$time, 1)), breakpoint = tbp, magnitude = magnitude)
class(rval) <- "bfastmonitor"
if (plot)
plot(rval)
return(rval)
}
########################################
## Reversely Ordered CUSUM (ROC) test ##
########################################
# A technique to verify whether or not the historical period is stable or not
# by reversely order sample and perform a recursive CUSUM test.
history_roc <- function(formula, data, level = 0.05) {
# Number of rows in the data (historical period).
n <- nrow(data)
# Storing the reverse ordering of the data in data_rev.
data_rev <- data[n:1,]
# Making a time series of the response variables.
data_rev$response <- ts(data_rev$response)
# efp will return a one-dimensional empirical fluctuation process of CUmulative
# SUms of residuals that are based on recursive residuals. The CUSUM algorithm
# fits a linear model to a time series. For each observation the standardized
# differences between the measurement and the linear model is calculated and
# cumulatively summed up. If the resulting process is in control, the cumulative
# sum of residuals fluctuate around zero. If the process is out of control, the sum
# of cumulated residuals will progressively depart from zero. As a mean to decide
# whether and when the process is out of control the Standard Brownian Motion (Wiener
# Process) is constructed as a limiting function. When the CUSUM function intersects
# the limiting function a change is assumed, followed by the estimation of the change
# time. ROC simply applies the CUSUM test in reverse order – beginning at the start of
# the monitoring period and cumulatively summing the residuals going backward in time
# until the CUSUM algorithm breaks. The time where it breaks is defined as a start time
# for the stable history period.
y_rcus <- efp(formula, data = data_rev, type = "Rec-CUSUM")
# sctest() performs a structural change test. If the p value of the test testing that
# there is a strucural change is lower than the significance level, the start time for
# the history is initialized with the corresponding index. If no structural change is
# detected, the start of the history period is set to the first index (all the data
# until the beginning of the monitoring period is most likely stable/representative).
y_start <- if(sctest(y_rcus)$p.value < level) {
# Length of all measurements minus the minimum index of the set of measurements of
# the CUSUM process that exeeds the threshold/boundary yielding the index where a
# structural break was detected. By adding 1 to this index the start time is set to
# one step behind the time the break was detected.
length(y_rcus$process) - min(which(abs(y_rcus$process)[-1] > boundary(y_rcus)[-1]))
+ 1
} else {
1
}
# Returns the start time of the stable history period.
data$time[y_start]
}
##################################
## Bai & Perron last breakpoint ##
##################################
history_break <- function(formula, data, h = NULL, hpc = "none") {
# Number of rows in the data (historical period).
n <- nrow(data)
# rule of thumb for minimal segment size
if(is.null(h)) h <- 6 * NCOL(model.matrix(formula, data = data[0,]))
# conduct breakpoints estimation
bp <- breakpoints(formula, data = data, h = h, hpc = hpc)
# Select the last detected breakpoint.
y_start <- tail(breakpoints(bp)$breakpoints, 1)
# If no breakpoint is detected the all of the history period
# is selected as a stable history period. Otherwise the start
# of the stable history period is set to on time step after the
# last breakpoint was detected.
y_start <- if(is.na(y_start)) 1 else y_start + 1
data$time[y_start]
}