Project Overview

This project applies time series forecasting to three datasets: ATM cash withdrawals, residential power usage, and hourly waterflow. Using the fpp3 framework in R, I fit ETS and ARIMA models to each dataset, compare forecasts, and export the final predictions to Excel.

Part A – ATM Cash Forecast

# Read and prepare ATM dataset
atm <- read_excel("ATM624Data.xlsx") %>%
  transmute(date = as.Date(DATE), atm = as.character(ATM), cash = as.numeric(Cash)) %>%
  arrange(atm, date) %>%
  as_tsibble(index = date, key = atm)

# Visualize cash withdrawals
autoplot(atm, cash) + labs(title = "ATM Daily Cash Withdrawals", y = "Cash (Hundreds $)")

# Split into training and testing
n <- nrow(atm)
train <- atm %>% slice_head(n = floor(0.8 * n))

# Fit ETS and ARIMA models
fit <- train %>% model(ETS = ETS(cash), ARIMA = ARIMA(cash))

# Forecast and plot
fc <- fit %>% forecast(h = n - nrow(train))
autoplot(fc, train) + labs(title = "ATM Cash Forecast", y = "Cash (Hundreds $)")

# Export forecasts
write_xlsx(as_tibble(fc), "ATM_Forecast_Sabina.xlsx")

Part B – Residential Power Forecast

# Read and clean power usage dataset
power <- read_excel("ResidentialCustomerForecastLoad-624.xlsx") %>%
  transmute(month = yearmonth(str_replace(`YYYY-MMM`, "-", " ")), 
            kwh = as.numeric(KWH)) %>%
  arrange(month) %>%
  as_tsibble(index = month)

# Visualize power consumption trend
autoplot(power, kwh) + labs(title = "Residential Power Usage", y = "KWH")

# Train/test split
n <- nrow(power)
train <- power %>% slice_head(n = floor(0.8 * n))

# Fit ETS and ARIMA models
fit <- train %>% model(ETS = ETS(kwh), ARIMA = ARIMA(kwh))

# Forecast and plot
fc <- fit %>% forecast(h = n - nrow(train))
autoplot(fc, train) + labs(title = "Residential Power Forecast", y = "KWH")

# Export results
write_xlsx(as_tibble(fc), "Power_Forecast_Sabina.xlsx")

Part C – Waterflow Forecast

# Function to prepare hourly data and fix missing timestamps
make_hourly_series <- function(file_path) {
  data <- read_excel(file_path) %>%
    transmute(time = as.POSIXct(`Date Time`),
              flow = as.numeric(WaterFlow)) %>%
    filter(!is.na(time), !is.na(flow)) %>%
    mutate(hour = floor_date(time, "hour")) %>%
    group_by(hour) %>%
    summarise(flow = mean(flow, na.rm = TRUE), .groups = "drop") %>%
    arrange(hour)

  # Fill any gaps and create continuous hourly data
  full_hours <- tibble(hour = seq.POSIXt(from = min(data$hour),
                                         to = max(data$hour),
                                         by = "hour"))
  full <- full_hours %>%
    left_join(data, by = "hour") %>%
    mutate(flow = zoo::na.approx(flow, na.rm = FALSE)) %>%
    mutate(flow = zoo::na.locf(flow, na.rm = FALSE)) %>%
    mutate(flow = zoo::na.locf(flow, fromLast = TRUE)) %>%
    mutate(t = row_number()) %>%
    arrange(t)

  # Convert to tsibble and assign regular interval
  ts <- tsibble::as_tsibble(full, index = t)
  attr(ts, "interval") <- tsibble::new_interval(1)
  class(ts) <- c("tbl_ts", class(ts))
  ts
}

# Read both pipe datasets
pipe1 <- make_hourly_series("Waterflow_Pipe1.xlsx")
pipe2 <- make_hourly_series("Waterflow_Pipe2.xlsx")

# Fit ETS and ARIMA models
fit1 <- pipe1 %>% model(ETS = ETS(flow), ARIMA = ARIMA(flow))
fit2 <- pipe2 %>% model(ETS = ETS(flow), ARIMA = ARIMA(flow))

# Forecast 168 hours (7 days)
fc1 <- forecast(fit1, h = 168)
fc2 <- forecast(fit2, h = 168)

# Plot the results
autoplot(fc1, pipe1) + labs(title = "Pipe 1 – 168 Hour Forecast", y = "Flow")

autoplot(fc2, pipe2) + labs(title = "Pipe 2 – 168 Hour Forecast", y = "Flow")

# Export forecast results
write_xlsx(list(Pipe1 = as_tibble(fc1),
                Pipe2 = as_tibble(fc2)),
           "Waterflow_Forecast_Sabina.xlsx")

Summary

Across all three datasets, ETS and ARIMA models were used to capture seasonal and trend components. Both models performed well, with ETS smoothing out patterns effectively and ARIMA adjusting to variations. Forecast outputs were visualized and exported for review.