Class Exercise 16

Question 1: Using the naive method as the forecast

time series data

week <- 1:6 #independent variable - time
value <- c(17, 13, 15, 11, 17, 14) #dependent variable

most recent value as forecast

forecast_a <- value[-length(value)] #excludes the last value
actual_a <- value[-1] #exclude the first value

mean absolute error

absolute_errors <- abs(actual_a - forecast_a)
mae <- mean(absolute_errors)
mae
## [1] 3.8

mean squared error

mse_a <- mean((actual_a - forecast_a)^2)
mse_a 
## [1] 16.2
Interpretation: Mean square error is 16.2

mean absolute percentage error

percentage_errors <- abs(actual_a - forecast_a)/ actual_a * 100
mape <- mean(percentage_errors)
mape
## [1] 27.43778

forecast the sales for week 7

forecast_week7_a <- tail(value, 1)
forecast_week7_a
## [1] 14
Interpretation: the value projected in week 7 is 14.

Question 2: Alabama building contract values

load libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

import data

df <- data.frame(month=c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12),
                 data=c(240, 352, 230, 260, 280, 322, 220, 310, 240, 310, 240, 230))

descriptive statistics

summary(df)
##      month            data      
##  Min.   : 1.00   Min.   :220.0  
##  1st Qu.: 3.75   1st Qu.:237.5  
##  Median : 6.50   Median :250.0  
##  Mean   : 6.50   Mean   :269.5  
##  3rd Qu.: 9.25   3rd Qu.:310.0  
##  Max.   :12.00   Max.   :352.0
Interpretation: the average contract over the 12-month period is 269.5

Part A: Construct a time series plot

plot(df$month, df$data, type = "o", col = "blue", xlab = "Month", ylab = "Value",
     main = "Alabama Building Contract Value")

Interpretation: the time series plot exhibits a scattered pattern. There was a spike in value in the second month and then also a constant up and down between the sixth and eleventh month.

three-month moving average

df$avg_data3 <- c(NA, NA, NA,
                  (df$data[1] + df$data[2] + df$data[3]) / 3,
                  (df$data[2] + df$data[3] + df$data[4]) / 3,
                  (df$data[3] + df$data[4] + df$data[5]) / 3,
                  (df$data[4] + df$data[5] + df$data[6]) / 3,
                  (df$data[5] + df$data[6] + df$data[7]) / 3,
                  (df$data[6] + df$data[7] + df$data[8]) / 3,
                  (df$data[7] + df$data[8] + df$data[9]) / 3,
                  (df$data[8] + df$data[9] + df$data[10]) / 3,
                  (df$data[9] + df$data[10] + df$data[11]) / 3
                  )

calculate the squared errors (only for months were moving average is available)

df <- df %>%
  mutate(
    squared_error = ifelse(is.na(avg_data3), NA, (data - avg_data3)^2)
    )

compute mse (excluding the initial months with NA)

mse <- mean(df$squared_error, na.rm = TRUE)
mse
## [1] 2040.444

exponential smoothing

alpha <- 0.2
exp_smooth <- rep(NA, length(df$data))
exp_smooth[1] <- df$data[1] #starting point
for(i in 2: length(df$data)) {
  exp_smooth[i] <- alpha * df$data[i-1] + (1 - alpha) * exp_smooth[i-1]
}
mse_exp_smooth <- mean((df$sales[2:12] - exp_smooth[2:12])^2)
mse_exp_smooth
## [1] NaN

Part B: Compare the three-month moving average with the exponential smoothing forecast

better_method <- ifelse (mse < mse_exp_smooth, "Three-Month Moving Verage", "Exponential Smoothing")
# results
list(
  mse_moving_average = mse,
  mse_exponential_smoothing = mse_exp_smooth,
  better_method = better_method
) 
## $mse_moving_average
## [1] 2040.444
## 
## $mse_exponential_smoothing
## [1] NaN
## 
## $better_method
## [1] NA
output the mse - NA
Interpretation: MSE moving average gave an output of 2040.44 while exponential smoothing gave us an output of NaN. NaN means that it is an undefined result. As there is an undefined result, the better method was not able to compare the moving average with the exponential smoothing, giving us an output of NA.

Question 3: FreddieMac Mortgage Interest Rates

load libraries and data

library(readxl)
library(ggplot2)

df <- read_excel("Mortgage.xlsx")

summarize data

summary(df)
##       Year                         Period      Interest_Rate  
##  Min.   :2000-01-01 00:00:00   Min.   : 1.00   Min.   :2.958  
##  1st Qu.:2005-10-01 18:00:00   1st Qu.: 6.75   1st Qu.:3.966  
##  Median :2011-07-02 12:00:00   Median :12.50   Median :4.863  
##  Mean   :2011-07-02 18:00:00   Mean   :12.50   Mean   :5.084  
##  3rd Qu.:2017-04-02 06:00:00   3rd Qu.:18.25   3rd Qu.:6.105  
##  Max.   :2023-01-01 00:00:00   Max.   :24.00   Max.   :8.053
Interpretation: the average interest rate over a 24 year period is 5.08

Part A: Construct a time series plot

ggplot(df, aes(x = Period, y = Interest_Rate)) +
  geom_line() +
  geom_point() + 
  xlab("Period") +
  ylab("Interest_Rates") +
  ggtitle("FreddieMac Mortgage Interest Rates")

Interpretation: Can observe a decreasing trend in the time series plot.

Part B: Develop a linear trend equation

model <- lm(Interest_Rate ~ Period, data = df)
summary(model)
## 
## Call:
## lm(formula = Interest_Rate ~ Period, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3622 -0.7212 -0.2823  0.5015  3.1847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.69541    0.43776  15.295 3.32e-13 ***
## Period      -0.12890    0.03064  -4.207 0.000364 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.039 on 22 degrees of freedom
## Multiple R-squared:  0.4459, Adjusted R-squared:  0.4207 
## F-statistic:  17.7 on 1 and 22 DF,  p-value: 0.0003637
Estimates linear trend equation: Interest Rate = 6.70 + -0.13*Period
T-hat = 6.70 + -0.13*t
The R-square is 0.45 (moderately fits the data)
The overall model is significant as p-value < 0.05

Part C: Forecast the average interest rate for period 25

forecast_period_25 <- predict(model, newdata = data.frame(Period = 25))
forecast_period_25
##        1 
## 3.472942
Interpretation: The forecasted average interest rate for period 25 is 3.47