The data set is the monthly mean average temperature for the Boston Area, MA (ThreadEx) from January 1895 to February 2022. The goal of this script is to forecast the mean average temperature for the next 3 years.
Cleaning the data set
library(tidyverse)
library(readxl)
library(zoo)
tempwide.data <- read_excel("bostemp.xlsx")
bostemp <- tempwide.data %>%
gather(key="month", value="measurement", 2:13) #transform data from long to wide
bostemp$month <- as.double(match(bostemp$month,month.abb)) #convert month abbreviations to numbers
bostemporder <- bostemp[order(bostemp$Year, bostemp$month),] #order by year then by month
bostontemp <- bostemporder %>%
unite(Date, Year, month, sep= "-") #merge year and month column
#remove = FALSE to keep merged columns or not
summary(bostontemp)
Date measurement
Length:1536 Min. :17.50
Class :character 1st Qu.:36.62
Mode :character Median :51.35
Mean :51.14
3rd Qu.:65.78
Max. :78.70
NA's :10
Creating the time series model and plotting the data
library(forecast)
bostemp.ts <- ts(bostontemp$measurement, start = c(1895, 1), end = c(2022, 2), freq = 12)
#plot
plot(bostemp.ts, ylim = c(10, 100), xlab = 'Time (Monthly)', ylab = 'Average Monthly Temperature', bty = "l", xaxt = "n", xlim = c(1893,2026))
axis(1, at = seq(1895, 2025, 1), labels = format(seq(1895, 2025, 1), digits = 4))
Decomposing the data
bostempdata <- decompose(bostemp.ts, "multiplicative")
plot(bostempdata)
Partitioning the data
nValid <- 314
nTrain <- length(bostemp.ts) - nValid
#window function creates subset
train.ts <- window(bostemp.ts, start = c(1895, 1), end = c(1895, nTrain))
valid.ts <- window(bostemp.ts, start = c(1895, nTrain + 1), end = c(1895, nTrain + nValid))
Creating the naive and seasonal naive models
#Naive method
naive.pred <- naive(train.ts, h = nValid)
#Seasonal Naive
snaive.pred <- snaive(train.ts, h = nValid)
Model Testing
#Polynomial with seasonality
btemp.poly.season <- tslm(train.ts ~ trend + I(trend^2) + season)
btemp.poly.season.pred <- forecast(btemp.poly.season, h = nValid, level = 0)
#Regression with trend and seasonality
btemp.season <- tslm(train.ts ~ trend + season)
btemp.season.pred <- forecast(btemp.season, h = nValid, level = 0)
#Regression with seasonality
btemp.lm.season <- tslm(train.ts ~ season)
btemp.lm.season.pred <- forecast(btemp.lm.season, h = nValid, level = 0)
#Holt-Winter
btemp.hw <- ets(train.ts)
btemp.hw.pred <- forecast(btemp.hw, h = nValid)
#ARIMA
btemp.res.arima <- Arima(btemp.season.pred$residuals, order = c(1,0,0))
btemp.res.arima.pred <- forecast(btemp.res.arima, h = nValid)
#AUTOARIMA
btemp.arima <- auto.arima(train.ts)
btemp.arima.pred <- forecast(btemp.arima, h = nValid, level = 0)
#TBATS
bostemp.tbats <- tbats(train.ts)
bostemp.tbats.pred <- forecast(bostemp.tbats, h = nValid, level = 0)
#Model Comparison
accuracy(naive.pred$mean, nValid)
ME RMSE MAE MPE MAPE
Test set 282.3 282.3 282.3 89.90446 89.90446
accuracy(snaive.pred$mean, nValid)
ME RMSE MAE MPE MAPE
Test set 279.4 279.4 279.4 88.98089 88.98089
accuracy(btemp.poly.season.pred$mean, valid.ts)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 0.8636831 3.102232 2.396674 1.40867 5.460257 0.2438544 0.4050522
accuracy(btemp.season.pred$mean, valid.ts)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 0.2428499 2.95461 2.291194 0.09320737 5.309109 0.2260849 0.3940694
accuracy(btemp.lm.season.pred$mean, valid.ts)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 1.382219 3.274637 2.53226 2.512611 5.685839 0.238709 0.4215742
accuracy(btemp.hw.pred$mean, valid.ts)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 0.5923792 3.033062 2.34307 0.8548165 5.386994 0.2353383 0.4003121
accuracy(btemp.res.arima.pred$mean, valid.ts)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 52.13401 54.41552 52.13401 100.0069 100.0069 0.841017 5.405755
accuracy(btemp.arima.pred$mean, valid.ts)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 0.6501215 3.451506 2.703667 1.492573 6.145284 0.2249527 0.46143
accuracy(bostemp.tbats.pred$mean, valid.ts)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 0.822304 3.028563 2.330751 1.335494 5.293352 0.2749108 0.3957969
#Comparing the MAPE, MAE, and RMSE, the TBATS model performs the best on all 3.
Plot of TBATS and Holt-Winter in comparison to the actual data
plot(bostemp.ts, ylim = c(10, 100), xlab = 'Time (Monthly)', ylab = 'Average Monthly Temperature', bty = "l", xaxt = "n", xlim = c(1893,2026))
axis(1, at = seq(1895, 2025, 1), labels = format(seq(1895, 2025, 1), digits = 4))
lines(btemp.hw.pred$mean, col = 'purple')
lines(bostemp.tbats.pred$mean, col = 'red')
Predicting future temperatures
bostemptbat <- tbats(bostemp.ts)
bostempforecast <- forecast(bostemptbat, level=c(95), h=10*12)
plot(bostempforecast, xlab = "Time", ylab = "Temperature F")
summary(bostempforecast)
Forecast method: TBATS(0.999, {1,2}, 0.8, {<12,5>})
Model Information:
TBATS(0.999, {1,2}, 0.8, {<12,5>})
Call: tbats(y = bostemp.ts)
Parameters
Lambda: 0.99858
Alpha: 0.08273003
Beta: -0.02237019
Damping Parameter: 0.800001
Gamma-1 Values: 0.0003818051
Gamma-2 Values: -0.0006294087
AR coefficients: -0.241736
MA coefficients: 0.358005 0.071094
Seed States:
[,1]
[1,] 48.45157689
[2,] 0.12423961
[3,] -21.44109151
[4,] -0.17740660
[5,] -0.55514327
[6,] 0.16003726
[7,] 0.16309122
[8,] -3.62299945
[9,] 0.10989523
[10,] -0.16553959
[11,] 0.06680509
[12,] 0.07902798
[13,] 0.00000000
[14,] 0.00000000
[15,] 0.00000000
attr(,"lambda")
[1] 0.9985798
Sigma: 2.776871
AIC: 14368.11
Error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.182021 2.791518 2.150785 -0.9607962 5.206443 0.6867778 -0.003729932
Forecasts: