Annotated source code of BFAST Monitor (bfastmonitor)

# The function 'bfastmonitor' from the R-package BFAST
function (data, start, formula = response ~ trend + harmon, order = 3, 
    lag = NULL, slag = NULL, history = c("ROC", "BP", "all"), 
    type = "OLS-MOSUM", h = 0.25, end = 10, level = 0.05, hpc = "none", 
    verbose = FALSE, plot = FALSE) 
{
    # Replicates the level value two times.
    level <- rep(level, length.out = 2)
    
    # If the time series (data) is not a ts object it is coerced to such.
    if (!is.ts(data)) 
        data <- as.ts(data)
    
    # Returns the number of samples per unit time.
    freq <- frequency(data)
        
    # 'time2num' is a function to convert parameter x to a numeric time
    # stamp when the start parameter of bfastmonitor() is passed as pair of
    # period / cycle.  For a time series of frequency f, time n+i/f is
    # presented as c(n, i+1). For further understanding execute ?start
    # and see the Details section.
    
    # L is used as a suffix to qualify any number with the intent of making
    # it an explicit integer. It is useed to get the code to run faster and
    # consume less memory. A double ("numeric") vector uses 8 bytes per
    # element. An integer vector uses only 4 bytes per element.
    time2num <- function(x) if (length(x) > 1L) 
        x[1L] + (x[2L] - 1)/freq
    else x
    
    # Initialize the starting date of the monitoring period.
    start <- time2num(start)
    
    # Dataframe for subsequent regression modeling.
    data_tspp <- bfastpp(data, order = order, lag = lag, slag = slag)
    
    # Building a subset from data_tspp containing only the data until the
    # beginning of the monitoring period (the data of the history period).
    history_tspp <- subset(data_tspp, time < start)
    
    #############################################################
    ### IDENTIFY THE START TIME OF THE STABLE HISTORY PERIOD ####
    #############################################################
    
    # If the passed history parameter is null, extract the time when the
    # first observation was taken.
    if (is.null(history)) {
          # Compare to the implementation of history parameter "all".
          history <- start(history_tspp$response)
    }
    
    # Executed in case the passed history parameter is a character string.
    else if (all(is.character(history))) {
        # Matches the perhaps abbreviated passed history parameter against
        # possible parameter values that are obtained from the the signature
        # of the function from which match.arg is called (-> bfastmonitor()). 
        history <- match.arg(history)
        
        # Switch evaluates the history parameter and accordingly executes
        # one of the further functions. The returned result is the start time
        # of the stable history period and is stored back to the history variable.
        history <- switch(history,
            # The full temporal range of the history period is used as the stable
            # history period. The time the first observation was taken is extracted.
            all = start(history_tspp$response),
            # See annotated 'history_roc' function below.
            ROC = history_roc(formula, data = history_tspp, level = level[2]),
            # See annotated 'history_break' function below.
            BP = history_break(formula, data = history_tspp, hpc = hpc)
        )
    }
    
    # Executed in case the history parameter was passed as a function.
    # This allows the user to pass an own data-driven process that detects
    # a suitable start time for the stable history period.
    else if (all(is.function(history))) {
        # history(formula, data) computes a numeric start date
        history <- history(formula, data = history_tspp)
    }
    
    # Converting the period-cycle notation of the start time of the stable
    # history to a numeric time stamp.
    history <- time2num(history)
    
    ###########################################
    ### MODELING THE STABLE HISTORY PERIOD ####
    ###########################################
    
    # Selecting the data of the stable history period.
    history_tspp <- subset(history_tspp, time >= history)
    
    # Prints information about the monitoring during computation
    # when 'verbose' variable is equals TRUE.
    if (verbose) {
        cat("\nBFAST monitoring\n\n1. History period\n")
        cat(sprintf("Stable period selected: %i(%i)--%i(%i)\n", 
            start(history_tspp$response)[1], start(history_tspp$response)[2], 
            end(history_tspp$response)[1], end(history_tspp$response)[2]))
        cat(sprintf("Length (in years): %f\n", NROW(history_tspp)/freq))
    }
    
    # test_tssp holds the data of the stable history period.
    test_tspp <- history_tspp
    
    # test_mefp is the seasonal model for the stable history period.
    test_mefp <- mefp(formula, data = test_tspp, type = type, 
        period = end, h = h, alpha = level[1])
    
    # test_lm ist the linear trend model for the stable history period.
    test_lm <- lm(formula, data = test_tspp)
    
    # Check for a sufficient length of the stable history period.
    if (floor(h * NROW(test_tspp)) <= 1 | NROW(test_tspp) <= 
        length(coef(test_lm))) {
        ok <- FALSE
        warning("too few observations in selected history period")
    }
    else {
        ok <- TRUE
    }
    
    # Print coeffiecients of the linear model if 'verbose' is equals TRUE.
    if (verbose) {
        cat("Model fit:\n")
        print(coef(test_lm))
    }
    
    #######################################################
    ## ANALYZING THE MONITORING PERIOD FOR ABRUPT CHANGE ##
    ####################################################### 
    
    # Selecting the monitoring period.
    test_tspp <- subset(data_tspp, time >= history)
    
    # If the stable history period has a sufficient length the
    # monitoring period is monitored for abrupt change and if any
    # breakpoint is detected the time of change is extracted.
    if (ok) {
        # Monitor the monitoring period for abrupt change.
        test_mon <- monitor(test_mefp, data = test_tspp, verbose = FALSE)
        
        # If breakpoint was found its time is extracted
        tbp <- if (is.na(test_mon$breakpoint)) 
            NA
        else test_tspp$time[test_mon$breakpoint]
        
        # Print information on the monitoring if 'verbose' is equals TRUE.
        if (verbose) {
            cat("\n\n2. Monitoring period\n")
            cat(sprintf("Monitoring starts at: %i(%i)\n", floor(start), 
                round((start - floor(start)) * freq) + 1))
            if (is.na(tbp)) {
                cat("Break detected at: -- (no break)\n\n")
            }
            else {
                cat(sprintf("Break detected at: %i(%i)\n\n", 
                  floor(tbp), round((tbp - floor(tbp)) * freq) + 
                    1))
            }
        }
    }
    
    # If the stable history period has NO sufficient length the two
    # variables are resetted.
    else {
        test_mon <- NA
        tbp <- NA
    }
    
    # Calculate the change magnitude of bfastmonitor: the magnitude 
    # is the median of the difference between the data and the model
    # prediction in the monitoring period.
    if (ok) {
        test_tspp$prediction <- predict(test_lm, newdata = test_tspp)
        new_data <- subset(test_tspp, time >= start)
        magnitude <- median(new_data$response - new_data$prediction, 
            na.rm = TRUE)
    }
    
    else {
        test_tspp$prediction <- NA
        magnitude <- NA
    }
    
    # The return values of bfastmonitor are selected.
    rval <- list(data = data, tspp = test_tspp, model = test_lm, 
        mefp = test_mon, history = c(head(history_tspp$time, 
            1), tail(history_tspp$time, 1)), monitor = c(start, 
            tail(test_tspp$time, 1)), breakpoint = tbp, magnitude = magnitude)
    
    class(rval) <- "bfastmonitor"
    
    if (plot) 
        plot(rval)
    return(rval)
}

########################################
## Reversely Ordered CUSUM (ROC) test ##
########################################

# A technique to verify whether or not the historical period is stable or not
# by reversely order sample and perform a recursive CUSUM test.

history_roc <- function(formula, data, level = 0.05) {
  # Number of rows in the data (historical period).
  n <- nrow(data)
  
  # Storing the reverse ordering of the data in data_rev.
  data_rev <- data[n:1,]
  
  # Making a time series of the response variables.
  data_rev$response <- ts(data_rev$response)
  
  # efp will return a one-dimensional empirical fluctuation process of CUmulative
  # SUms of residuals that are based on recursive residuals. The CUSUM algorithm
  # fits a linear model to a time series. For each observation the standardized
  # differences between the measurement and the linear model is calculated and
  # cumulatively summed up. If the resulting process is in control, the cumulative
  # sum of residuals fluctuate around zero. If the process is out of control, the sum
  # of cumulated residuals will progressively depart from zero. As a mean to decide
  # whether and when the process is out of control the Standard Brownian Motion (Wiener
  # Process) is constructed as a limiting function. When the CUSUM function intersects
  # the limiting function a change is assumed, followed by the estimation of the change
  # time. ROC simply applies the CUSUM test in reverse order – beginning at the start of
  # the monitoring period and cumulatively summing the residuals going backward in time
  # until the CUSUM algorithm breaks. The time where it breaks is defined as a start time
  # for the stable history period.
  y_rcus <- efp(formula, data = data_rev, type = "Rec-CUSUM")

  # sctest() performs a structural change test. If the p value of the test testing that
  # there is a strucural change is lower than the significance level, the start time for
  # the history is initialized with the corresponding index. If no structural change is
  # detected, the start of the history period is set to the first index (all the data
  # until the beginning of the monitoring period is most likely stable/representative).
  y_start <- if(sctest(y_rcus)$p.value < level) {
    # Length of all measurements minus the minimum index of the set of measurements of
    # the CUSUM process that exeeds the threshold/boundary yielding the index where a
    # structural break was detected. By adding 1 to this index the start time is set to
    # one step behind the time the break was detected.
    length(y_rcus$process) - min(which(abs(y_rcus$process)[-1] > boundary(y_rcus)[-1]))
      + 1
  } else {
    1    
  }
  
  # Returns the start time of the stable history period.
  data$time[y_start]
}

##################################
## Bai & Perron last breakpoint ##
##################################

history_break <- function(formula, data, h = NULL, hpc = "none") {
  # Number of rows in the data (historical period).
  n <- nrow(data)
  
  # rule of thumb for minimal segment size
  if(is.null(h)) h <- 6 * NCOL(model.matrix(formula, data = data[0,]))

  # conduct breakpoints estimation
  bp <- breakpoints(formula, data = data, h = h, hpc = hpc)
  
  # Select the last detected breakpoint.
  y_start <- tail(breakpoints(bp)$breakpoints, 1)
  
  # If no breakpoint is detected the all of the history period
  # is selected as a stable history period. Otherwise the start
  # of the stable history period is set to on time step after the
  # last breakpoint was detected.
  y_start <- if(is.na(y_start)) 1 else y_start + 1
  data$time[y_start]
}

Annotated source code of BFAST Monitor (bfastmonitor)

Christopher Stephan

03/04/2015