Class Exercise 16: Chapter 17

Project Objective

Using various forecasting methods: Naive Method, Three-month moving average, and Exponential Smoothing Method

Question 1:

df<-data.frame(week=1:6,
               value=c(17,13,15,11,17,14))

Naive Method (most recent value)

#MAE
forecast_a<-df$value[-length(df$value)] #Exclude the last sale
actual_a<-df$value[-1] #Exclude the first sale
mae_a<-mean(abs(actual_a-forecast_a))
cat("Mean Absolute Error (MAE):", round(mae_a, 2), "\n")
## Mean Absolute Error (MAE): 3.8
#MSE
mse_a<-mean((actual_a-forecast_a)^2)
cat("Mean Squared Error (MSE):", round(mse_a, 2), "\n") 
## Mean Squared Error (MSE): 16.2
#MAPE
mape_a <- mean(abs((actual_a - forecast_a) / actual_a)) * 100 
cat("Mean Absoulte Percentage Error (MAPE)", round(mape_a,2), "%\n")
## Mean Absoulte Percentage Error (MAPE) 27.44 %
#Forecast the value for week 7
forecast_week7_a<-tail(df$value,1)
forecast_week7_a 
## [1] 14
Interpretation: the value projected in week 7 is 14

Question 2:

#Load the required libraries
library(dplyr)
## Warning: 套件 'dplyr' 是用 R 版本 4.3.1 來建造的
## 
## 載入套件:'dplyr'
## 下列物件被遮斷自 'package:stats':
## 
##     filter, lag
## 下列物件被遮斷自 'package:base':
## 
##     intersect, setdiff, setequal, union
library(zoo)
## Warning: 套件 'zoo' 是用 R 版本 4.3.3 來建造的
## 
## 載入套件:'zoo'
## 下列物件被遮斷自 'package:base':
## 
##     as.Date, as.Date.numeric
df1<-data.frame(month=1:12,
               value=c(240, 352, 230, 260, 280, 322, 220, 310, 240, 310, 240, 230))

Time Series plot

plot(df1$month, df1$value, type = "o", col = "blue", 
     xlab = "Contract Month", ylab = "Contract Values (Millions)", 
     main = "Alabama Building Contracts Over 12 Months")

Interpretation:the time series plot exhibits a Seasonal Pattern.

Part A: Three-month moving average

#Manually calculate the Three-month Moving Average
df1$avg_value3 <- c(NA, NA, NA,
                    (df1$value[1] + df1$value[2] + df1$value[3]) / 3,
                    (df1$value[2] + df1$value[3] + df1$value[4]) / 3,
                    (df1$value[3] + df1$value[4] + df1$value[5]) / 3,
                    (df1$value[4] + df1$value[5] + df1$value[6]) / 3,
                    (df1$value[5] + df1$value[6] + df1$value[7]) / 3,
                    (df1$value[6] + df1$value[7] + df1$value[8]) / 3,
                    (df1$value[7] + df1$value[8] + df1$value[9]) / 3,
                    (df1$value[8] + df1$value[9] + df1$value[10]) / 3,
                    (df1$value[9] + df1$value[10] + df1$value[11]) / 3)

#Calculate the squared errors (only for weeks were moving average is available)
df1<-df1 %>%
  mutate(
    squared_error=ifelse(is.na(avg_value3),NA, (value-avg_value3)^2)
  )
#Compute MSE(excluding the initial weeks with NA)
mse<-mean(df1$squared_error, na.rm=TRUE)
mse
## [1] 2040.444

Part B: Exponential Smoothing Method

alpha<-0.2
exp_smooth<-rep(NA, length(df1$value))
exp_smooth[1]<-df1$value[1] #Starting point
for(i in 2: length(df1$value)){
  exp_smooth[i]<-alpha*df1$value[i-1]+(1-alpha)*exp_smooth[i-1]
}
mse_exp_smooth<-mean((df1$value[2:12]-exp_smooth[2:12])^2)
mse_exp_smooth 
## [1] 2593.762

Comparison & Results

#Compared
better_method<- ifelse(mse<mse_exp_smooth, "Three-Month Moving Average", "Exponential Smoothing")

#Results
list(
  MSE_Moving_Average=mse,
  MSE_Exponential_Smoothing=mse_exp_smooth,
  Better_Method=better_method
)
## $MSE_Moving_Average
## [1] 2040.444
## 
## $MSE_Exponential_Smoothing
## [1] 2593.762
## 
## $Better_Method
## [1] "Three-Month Moving Average"

Question 3

library(readxl)
## Warning: 套件 'readxl' 是用 R 版本 4.3.3 來建造的
library(ggplot2)
## Warning: 套件 'ggplot2' 是用 R 版本 4.3.3 來建造的
data <- read_excel("Mortgage.xlsx")

3A

ggplot(data, aes(x=Period, y=Interest_Rate))+
  geom_line()+
  geom_point()+
  xlab("Period")+
  ylab("Interest Rate")+
  ggtitle("Time Series Plot of 30-Year Mortgage Rates")

Interpretation: We observe a decreasing pattern followed by a sudden increase in the time series plot.

3B Develop linear trend equation

model<-lm(Interest_Rate~ Period, data=data)
summary(model)
## 
## Call:
## lm(formula = Interest_Rate ~ Period, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3622 -0.7212 -0.2823  0.5015  3.1847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.69541    0.43776  15.295 3.32e-13 ***
## Period      -0.12890    0.03064  -4.207 0.000364 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.039 on 22 degrees of freedom
## Multiple R-squared:  0.4459, Adjusted R-squared:  0.4207 
## F-statistic:  17.7 on 1 and 22 DF,  p-value: 0.0003637
Results: estimated linear trend equation: interest rate=6.70-0.13*Period
The R-square(only one variable)
The overall model is significant as p-value<0.05

3C Forecast the average interest rate for period 25 (i.e., 2024)

forecast_period_25<-predict(model, newdata = data.frame(Period=25))
round(forecast_period_25,2)
##    1 
## 3.47
Interpretation: The forecast number of the average interest rate for period 25 is ~3.47