** Class Exercise 16: Basic Time Series Forecasting Methods**

Project Objective

To use naive method, moving average and exponential smoothing to calculate MSE
and forecast accuracy.

Example 1: Naive Apporach

Time Series data

months <- 1:6
values <- c(17, 13, 15, 11, 17, 14)

Part A. Most recent value as forecast

forecasts_a <- values[-length(values)] #exclude the last value 
actual_a <- values[-1] #exclude the first value 
mse_a <- mean((actual_a - forecasts_a)^2) 
mse_a #result: 16.2
## [1] 16.2
forecast_month_7_a <- tail(values, 1)
forecast_month_7_a # 14
## [1] 14

Calculate the mean absolute error and mean absolute percentage error

# Calculate Mean Absolute Error (MAE)
mae <- mean(abs(actual_a - forecasts_a))
mae #3.8
## [1] 3.8
# Calculate Mean Absolute Percentage Error (MAPE)
mape <- mean(abs((actual_a - forecasts_a) / actual_a)) * 100
mape #27.44
## [1] 27.43778

Part B. Average all of the data as forecast

cumulative_averages <- cumsum(values[-length(values)]) / (1:(length(values) - 1))
cumulative_averages
## [1] 17.0 15.0 15.0 14.0 14.6
forecasts_b <- cumulative_averages
actual_b <- values[-1] #exclude the first value 
mse_b <- mean((actual_b - forecasts_b)^2)
mse_b #8.27
## [1] 8.272
forecast_month_7_b <- mean(values) #average of all months as forecast for month 7 
forecast_month_7_b # 14.5 
## [1] 14.5

Part C. Which method is better

better_method <- ifelse(mse_a < mse_b, "Most Recent Value", "Average of All Data")
list(
  MSE_most_recent_value = mse_a,
  forecast_month_7_most_recent = forecast_month_7_a,
  MSE_average_of_all_data = mse_b,
  forecast_month_7_average = forecast_month_7_b,
  Better_Method = better_method
) #Result: Better method is the average of all data 
## $MSE_most_recent_value
## [1] 16.2
## 
## $forecast_month_7_most_recent
## [1] 14
## 
## $MSE_average_of_all_data
## [1] 8.272
## 
## $forecast_month_7_average
## [1] 14.5
## 
## $Better_Method
## [1] "Average of All Data"

Example 2: Three-month moving average and exponential smoothing forecast

Install some packages

options(repos = c(CRAN = "https://cran.rstudio.com"))

# Install packages
install.packages("dplyr")
## 
## The downloaded binary packages are in
##  /var/folders/5f/2vqvsqc91055s6nc4m0ldk2r0000gn/T//RtmpOFEZFQ/downloaded_packages
install.packages("zoo")
## 
## The downloaded binary packages are in
##  /var/folders/5f/2vqvsqc91055s6nc4m0ldk2r0000gn/T//RtmpOFEZFQ/downloaded_packages
# Load libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

Import the data

df <- data.frame(month=c(1,2,3,4,5,6,7,8,9,10,11,12),
                 data=c(240, 352, 230, 260, 280, 322, 220, 310, 240, 310, 240, 230))

Part A. Time series plot

plot(df$month, df$data, type = "o", col = "blue", xlab = "Month", ylab = "Values",
     main = "The values of Alabama building contracts")

Interpretation: The time-series plot exhibits a seasonal pattern, with peak values in month 2, 6, 8 and 10 and it seems to repeat monthly.

Part B. Three-month moving average

df$avg_value <- c(NA, NA, NA,
                     (df$data[1] + df$data[2] + df$data[3]) / 3,
                     (df$data[2] + df$data[3] + df$data[4]) / 3,
                     (df$data[3] + df$data[4] + df$data[5]) / 3,
                     (df$data[4] + df$data[5] + df$data[6]) / 3,
                     (df$data[5] + df$data[6] + df$data[7]) / 3,
                     (df$data[6] + df$data[7] + df$data[8])/ 3,
                     (df$data[7] + df$data[8] + df$data[9]) / 3,
                     (df$data[8] + df$data[9] + df$data[10]) / 3,
                     (df$data[9] + df$data[10] + df$data[11]) / 3)

Calculate the squared errors( only for the months where moving average is available)

df <- df %>%
  mutate(
    squared_error = ifelse(is.na(avg_value), NA, (data - avg_value)^2)
  )

Compute MSE (excluding the initial months with NA)

mse <- mean(df$squared_error, na.rm = TRUE)
mse #2040.44
## [1] 2040.444

Exponential smoothing

alpha = 0.2
exp_smooth <- rep(NA, length(df$data))
exp_smooth[1] <- df$data[1] #starting point 
for(i in 2:length(df$data)) {
  exp_smooth[i] <- alpha * df$data[i-1] + (1 - alpha)* exp_smooth[i-1]
}
mse_exp_smooth <- mean((df$data[2:12] - exp_smooth[2:12])^2)
mse_exp_smooth # 2593.76
## [1] 2593.762

Compare the three-month moving average and exponential smoothing

better_method <- ifelse(mse < mse_exp_smooth, "Three-month moving average", "Exponential smoothing")
list(
  MSE_moving_average= mse,
  MSE_exponential_smoothing = mse_exp_smooth,
  Better_Method = better_method
) #Result: Better method - Three-month moving average 
## $MSE_moving_average
## [1] 2040.444
## 
## $MSE_exponential_smoothing
## [1] 2593.762
## 
## $Better_Method
## [1] "Three-month moving average"

Example 3: The average interest rate (%) for a 30-year fixed-rate mortgage over a 20-year period

Load the packages and data

library(readxl)
library(ggplot2)

# Load the data 
df1 <- read_excel("Mortgage.xlsx")

Part A. Time series plot

ggplot(df1, aes(x = Period, y = Interest_Rate))+ 
  geom_line() + 
  geom_point() + 
  xlab("Period") +
  ylab("Interest_Rate (%") + 
  ggtitle("Time Series Plot of Mortgage")

Interpretation: We observe a decreasing trend in the first half of time series plot with a sharp increase towards the latest period. It could be a cyclical pattern.

Part B. Develop a linear trend equation

model <- lm(Interest_Rate ~ Period, data = df1)
summary(model)
## 
## Call:
## lm(formula = Interest_Rate ~ Period, data = df1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3622 -0.7212 -0.2823  0.5015  3.1847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.69541    0.43776  15.295 3.32e-13 ***
## Period      -0.12890    0.03064  -4.207 0.000364 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.039 on 22 degrees of freedom
## Multiple R-squared:  0.4459, Adjusted R-squared:  0.4207 
## F-statistic:  17.7 on 1 and 22 DF,  p-value: 0.0003637
Interpretation: linear trend equation: Revenue = 6.70 - 0.13*Period 

Part C. Forecast for period 25

forecast_period_25 <- predict(model, newdata = data.frame(Period = 25))
forecast_period_25 # 3.47
##        1 
## 3.472942
Interpretation: Using the linear trend equation from question 3B, we can
forecast the average interest rate for period 25 is 3.47.