Question 1

Using the naive method (most recent value) as the forecast for the next week, compute the following measures of forecast accuracy.
# install all libraries
library(readxl)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
#creating the dataset for the question
weeks = 1:6
values = c(17,13,15,11,17,14)
forecastw = values[-length(values)] # exclude last value
actualw = values[-1] # exclude first value

# part A: Mean Absolute error
maew <- mean(abs(actualw - forecastw))
# Mean Absolute error is 3.8

# part B: Mean Squared Error

msew = mean((actualw - forecastw)^2)
# Mean Squared Error is 16.2 

#Part C: Mean absolute percentage error.
mape <- mean(abs((actualw - forecastw) / actualw)) * 100
mape 
## [1] 27.43778
# Mean Absolute Percentage Error is 27.44%

# Part D: Forecast for week 7
forecastm7 = tail(values, 1) # predict next month (new value)
# The forcast for week 7 is 14 

Question 2

The values of Alabama building contracts (in $ millions) for a 12-month period is as follows: 240, 352, 230, 260, 280, 322, 220, 310, 240, 310, 240, 230. Use this data to answer question 2A and 2B below.

    Construct a time series plot. What type of pattern exists in the data? Upload a screenshot of your time series plot.
    Compare the three-month moving average approach with the exponential smoothing forecast using alpha = 0.2. Which provides more accurate forecasts based on MSE? Upload a screenshot of your code outputs with proper explanation of your answers.
# part A: Time series plot
alabuild = data.frame(month=c(1:12),
                 data = c(240,352,230,260,280,322,220,310,240,310,240,230))
plot(alabuild$month, alabuild$data, type='o', col='blue', xlab='Month', ylab='Contract Value', 
     main='Alabama building contracts (in $ millions)')

# Interpretation:  The pattern is a horizontal pattern because of the up and down movement pattern. There is a slight decrease in contract value as the months go on. possibly a decline in value. 

# part B: 3 month moving average
alabuild$avg <- c(NA, NA, NA, # 3 NA values for the first 3 months
                    (alabuild$data[1] + alabuild$data[2] + alabuild$data[3]) / 3,
                    (alabuild$data[2] + alabuild$data[3] + alabuild$data[4]) / 3,
                    (alabuild$data[3] + alabuild$data[4] + alabuild$data[5]) / 3,
                    (alabuild$data[4] + alabuild$data[5] + alabuild$data[6]) / 3,
                    (alabuild$data[5] + alabuild$data[6] + alabuild$data[7]) / 3,
                    (alabuild$data[6] + alabuild$data[7] + alabuild$data[8]) / 3,
                    (alabuild$data[7] + alabuild$data[8] + alabuild$data[9]) / 3,
                    (alabuild$data[8] + alabuild$data[9] + alabuild$data[10]) / 3,
                    (alabuild$data[9] + alabuild$data[10] + alabuild$data[11]) / 3)

# calculate the square errors (only for months where moving average is available). creates new column for sqaured error
alabuild = alabuild %>%
  mutate(
    squarederror = ifelse(is.na(avg), NA, (data - avg)^2)
  )

# compute MSE (excluding the initial months with NA) 
msea = mean(alabuild$squarederror, na.rm = TRUE)
msea # Mean Squared Error is MSE 2040.44
## [1] 2040.444
# exp smoothing
alpha = 0.2
expsmootha = rep(NA, length(alabuild$data))
expsmootha[1] = alabuild$data[1] # starting with first data
for (i in 2:length(alabuild$data)) {
  expsmootha[i] = alpha * alabuild$data[i-1] + (1 - alpha) * expsmootha[i - 1] # equation for exp smoothing
}
mseexpsmootha = mean((alabuild$data[2:12] - expsmootha[2:12])^2) # starting from 2nd
mseexpsmootha # MSE expnentioal smoothing is 2593.76
## [1] 2593.762
# Interpretation: moving average is a better fit since the MSE (2040.444) is lower than the exp. smoothing MSE (2593.76).

Question 3

The following data shows the average interest rate (%) for a 30-year fixed-rate mortgage over a 20-year period.
# Loading the dataset from excel file
mortage = read_excel(file.choose())

# Part A: creating time series plot
ggplot(mortage, aes(x=Year, y=Interest_Rate)) + geom_line() + geom_point()+ xlab("Period") + ylab("Interest Rate ")+
  ggtitle("Interest Rate Over Time")

# interpretation:  time series plot shows an downward trend in interest rates over the years ( periods). At time period 22 we see a skyrocket in interest rates. 

# Part B linear trend 
modelm = lm(Interest_Rate ~ Period, data= mortage)
# 6.70 - .13*Period is the equation for the linear trend model

# Part C: forecast for period 25
forecastperiod25 = predict(modelm, newdata = data.frame(Period = 25))
forecastperiod25 
##        1 
## 3.472942
# 3.47 is forecasted interest rate for period 25