Question 1
Using the naive method (most recent value) as the forecast for the next week, compute the following measures of forecast accuracy.
# install all libraries
library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
#creating the dataset for the question
weeks = 1:6
values = c(17,13,15,11,17,14)
forecastw = values[-length(values)] # exclude last value
actualw = values[-1] # exclude first value
# part A: Mean Absolute error
maew <- mean(abs(actualw - forecastw))
# Mean Absolute error is 3.8
# part B: Mean Squared Error
msew = mean((actualw - forecastw)^2)
# Mean Squared Error is 16.2
#Part C: Mean absolute percentage error.
mape <- mean(abs((actualw - forecastw) / actualw)) * 100
mape
## [1] 27.43778
# Mean Absolute Percentage Error is 27.44%
# Part D: Forecast for week 7
forecastm7 = tail(values, 1) # predict next month (new value)
# The forcast for week 7 is 14
Question 2
The values of Alabama building contracts (in $ millions) for a 12-month period is as follows: 240, 352, 230, 260, 280, 322, 220, 310, 240, 310, 240, 230. Use this data to answer question 2A and 2B below.
Construct a time series plot. What type of pattern exists in the data? Upload a screenshot of your time series plot.
Compare the three-month moving average approach with the exponential smoothing forecast using alpha = 0.2. Which provides more accurate forecasts based on MSE? Upload a screenshot of your code outputs with proper explanation of your answers.
# part A: Time series plot
alabuild = data.frame(month=c(1:12),
data = c(240,352,230,260,280,322,220,310,240,310,240,230))
plot(alabuild$month, alabuild$data, type='o', col='blue', xlab='Month', ylab='Contract Value',
main='Alabama building contracts (in $ millions)')

# Interpretation: The pattern is a horizontal pattern because of the up and down movement pattern. There is a slight decrease in contract value as the months go on. possibly a decline in value.
# part B: 3 month moving average
alabuild$avg <- c(NA, NA, NA, # 3 NA values for the first 3 months
(alabuild$data[1] + alabuild$data[2] + alabuild$data[3]) / 3,
(alabuild$data[2] + alabuild$data[3] + alabuild$data[4]) / 3,
(alabuild$data[3] + alabuild$data[4] + alabuild$data[5]) / 3,
(alabuild$data[4] + alabuild$data[5] + alabuild$data[6]) / 3,
(alabuild$data[5] + alabuild$data[6] + alabuild$data[7]) / 3,
(alabuild$data[6] + alabuild$data[7] + alabuild$data[8]) / 3,
(alabuild$data[7] + alabuild$data[8] + alabuild$data[9]) / 3,
(alabuild$data[8] + alabuild$data[9] + alabuild$data[10]) / 3,
(alabuild$data[9] + alabuild$data[10] + alabuild$data[11]) / 3)
# calculate the square errors (only for months where moving average is available). creates new column for sqaured error
alabuild = alabuild %>%
mutate(
squarederror = ifelse(is.na(avg), NA, (data - avg)^2)
)
# compute MSE (excluding the initial months with NA)
msea = mean(alabuild$squarederror, na.rm = TRUE)
msea # Mean Squared Error is MSE 2040.44
## [1] 2040.444
# exp smoothing
alpha = 0.2
expsmootha = rep(NA, length(alabuild$data))
expsmootha[1] = alabuild$data[1] # starting with first data
for (i in 2:length(alabuild$data)) {
expsmootha[i] = alpha * alabuild$data[i-1] + (1 - alpha) * expsmootha[i - 1] # equation for exp smoothing
}
mseexpsmootha = mean((alabuild$data[2:12] - expsmootha[2:12])^2) # starting from 2nd
mseexpsmootha # MSE expnentioal smoothing is 2593.76
## [1] 2593.762
# Interpretation: moving average is a better fit since the MSE (2040.444) is lower than the exp. smoothing MSE (2593.76).
Question 3
The following data shows the average interest rate (%) for a 30-year fixed-rate mortgage over a 20-year period.
# Loading the dataset from excel file
mortage = read_excel(file.choose())
# Part A: creating time series plot
ggplot(mortage, aes(x=Year, y=Interest_Rate)) + geom_line() + geom_point()+ xlab("Period") + ylab("Interest Rate ")+
ggtitle("Interest Rate Over Time")

# interpretation: time series plot shows an downward trend in interest rates over the years ( periods). At time period 22 we see a skyrocket in interest rates.
# Part B linear trend
modelm = lm(Interest_Rate ~ Period, data= mortage)
# 6.70 - .13*Period is the equation for the linear trend model
# Part C: forecast for period 25
forecastperiod25 = predict(modelm, newdata = data.frame(Period = 25))
forecastperiod25
## 1
## 3.472942
# 3.47 is forecasted interest rate for period 25