options(repos = c(CRAN = "https://cloud.r-project.org"))
options(repos = c(CRAN = "https://cloud.r-project.org"))

Question 1. Naive Approach

Project Objective

To forecast for the next week, using Naive method (Most Recent Value)

Step 1. Input the data

week <- 1:6 # This is the independent variable - time
values <- c(17,13,15,11,17,14) # Dependent variable
Data Description: A Description of the features are presented in the table below
Variable       | Definition
1.Week         | Time period by weeks
2.Value        | Coordinated value for each week from week 1 to week 6

Step 2. Exclude the Last and First Value

forecast_a <- values[-length(values)] #Excludes the last value
actual_a <- values[-1] #Excludes the first value

I ### Step 3. Find the MSE (Mean Squared Error)

mse_a <- mean((actual_a - forecast_a)^2)
mse_a 
## [1] 16.2
Interpretation:
The Mean Squared error is 16.2

Step 4. Forecast the sales for the week 7

forecast_week7_a <- tail(values, 1)
forecast_week7_a
## [1] 14
Interpretation: The value for week 7 is 14

Step 5. Find the MAE (Mean Absolute Error)

mae_a <- mean(abs(actual_a - forecast_a))
mae_a
## [1] 3.8
Interpretation: the Mean Absolute Error is 3.8

Step 6. Find the Mean Absolute Percentage Error (MAPE)

mape_a <- mean(abs((actual_a - forecast_a) / actual_a)) * 100 
mape_a
## [1] 27.43778
Interpretation: The MAPE is 27.44

Question 2. Moving Average and Smoothing Exponential

Project Objective

To Determine which Forecast is better to use between Moving Average and Exponential Smoothing

Part A. Moving Average

Step 1. Install and load the packages

install.packages("dplyr")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\DELL\AppData\Local\R\win-library\4.4\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\DELL\AppData\Local\R\win-library\4.4\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
## 
## The downloaded binary packages are in
##  C:\Users\DELL\AppData\Local\Temp\RtmpwvM4vv\downloaded_packages
install.packages("zoo")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'zoo' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'zoo'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\DELL\AppData\Local\R\win-library\4.4\00LOCK\zoo\libs\x64\zoo.dll to
## C:\Users\DELL\AppData\Local\R\win-library\4.4\zoo\libs\x64\zoo.dll: Permission
## denied
## Warning: restored 'zoo'
## 
## The downloaded binary packages are in
##  C:\Users\DELL\AppData\Local\Temp\RtmpwvM4vv\downloaded_packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(zoo)
## Warning: package 'zoo' was built under R version 4.4.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

Step 2. Input the Time Series Data

df <- data.frame(month=c(1,2,3,4,5,6,7,8,9,10,11,12),
                 values=c(240,352,230,260,280,322,220,310,240,310,240,230))
Data Description
Variable       | Defintion
Month          | The time period over 12 months
Values         | The monetary value of Alabama building contracts (in $ millions)

Step 3. Descriptive Statistics

summary(df)
##      month           values     
##  Min.   : 1.00   Min.   :220.0  
##  1st Qu.: 3.75   1st Qu.:237.5  
##  Median : 6.50   Median :250.0  
##  Mean   : 6.50   Mean   :269.5  
##  3rd Qu.: 9.25   3rd Qu.:310.0  
##  Max.   :12.00   Max.   :352.0
Interpretation: The mean is 269.5, Median is 250.0

Step 4. Time Series Plot

plot(df$month, df$values, type = "o", col = "blue", xlab = "Month", ylab = "$ Millions",
     main = "Values of Alabama Building contracts in 12 months")

Interpretation: The time series plot exhibits a horizontal pattern as it is steady on the mean

Step 5. Manually calculate the Three-Month Moving Average

df$avg_values3 <- c(NA, NA, NA,
                    (df$values[1] + df$values[2] + df$values[3]) / 3,
                    (df$values[2] + df$values[3] + df$values[4]) / 3,
                    (df$values[3] + df$values[4] + df$values[5]) / 3,
                    (df$values[4] + df$values[5] + df$values[6]) / 3,
                    (df$values[5] + df$values[6] + df$values[7]) / 3,
                    (df$values[6] + df$values[7] + df$values[8]) / 3,
                    (df$values[7] + df$values[8] + df$values[9]) / 3,
                    (df$values[8] + df$values[9] + df$values[10]) / 3,
                    (df$values[9] + df$values[10] + df$values[11]) / 3)

Step 6. Calculate the Squared Errors (only for months were moving average is available)

df <- df %>%
  mutate(
    squared_error = ifelse(is.na(avg_values3), NA, (values - avg_values3)^2)
  )

Step 7. Compute Mean Squared Error (MSE) (excluding the intial moths with NA)

mse <- mean(df$squared_error, na.rm = TRUE)
mse
## [1] 2040.444
Interpretation: the MSE - 2040.44

#Part B. Exponential Smoothing

Step 8. Input alpha value and

alpha <- 0.2

Step 9. Find the Mean Squared Error (MSE)

exp_smooth <- rep(NA, length(df$values))
exp_smooth[1] <- df$values[1]
for(i in 2: length(df$values)) {
  exp_smooth[i] <- alpha * df$values[i-1] + (1 - alpha) * exp_smooth[i-1]
}
mse_exp_smooth <- mean((df$values[2:12] - exp_smooth[2:12])^2)
mse_exp_smooth
## [1] 2593.762
Interpretation: Output the MSE - 2593.76

Step 10. Comparison

better_method <- ifelse(mse < mse_exp_smooth, "Three-Month Moving Average", "Exponential Smoothing")
#List the Result
list(
  MSE_Moving_Average = mse,
  MSE_Exponential_Smoothing = mse_exp_smooth,
  Better_Method = better_method
)
## $MSE_Moving_Average
## [1] 2040.444
## 
## $MSE_Exponential_Smoothing
## [1] 2593.762
## 
## $Better_Method
## [1] "Three-Month Moving Average"
Interpretation: The Three-Month Moving Average provides more accurate forecasts than Exponential Smoothing because it has a lower MSE (2040.44 < 2593.76), making this a better method to minimize errors and forecast more accurately

Linear Trend Regression Approach

Project Object.

Forecast the average interest mortgage rate for the next year period 2024

Question 3. Construct Time Series Plot and define its pattern

Step 1. Install and Load the packages

install.packages("ggplot2")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\DELL\AppData\Local\Temp\RtmpwvM4vv\downloaded_packages
install.packages("readxl")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\DELL\AppData\Local\Temp\RtmpwvM4vv\downloaded_packages
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2

Step 2. Import the data

df <- read_excel("Mortgage.xlsx")
df
## # A tibble: 24 × 3
##    Year                Period Interest_Rate
##    <dttm>               <dbl>         <dbl>
##  1 2000-01-01 00:00:00      1          8.05
##  2 2001-01-01 00:00:00      2          6.97
##  3 2002-01-01 00:00:00      3          6.54
##  4 2003-01-01 00:00:00      4          5.83
##  5 2004-01-01 00:00:00      5          5.84
##  6 2005-01-01 00:00:00      6          5.87
##  7 2006-01-01 00:00:00      7          6.41
##  8 2007-01-01 00:00:00      8          6.34
##  9 2008-01-01 00:00:00      9          6.03
## 10 2009-01-01 00:00:00     10          5.04
## # ℹ 14 more rows
colnames(df)
## [1] "Year"          "Period"        "Interest_Rate"
Data Description
Variable     |Definition
Period       | Time period in years

Step 3. Descriptive Statistics

summary(df)
##       Year                         Period      Interest_Rate  
##  Min.   :2000-01-01 00:00:00   Min.   : 1.00   Min.   :2.958  
##  1st Qu.:2005-10-01 18:00:00   1st Qu.: 6.75   1st Qu.:3.966  
##  Median :2011-07-02 12:00:00   Median :12.50   Median :4.863  
##  Mean   :2011-07-02 18:00:00   Mean   :12.50   Mean   :5.084  
##  3rd Qu.:2017-04-02 06:00:00   3rd Qu.:18.25   3rd Qu.:6.105  
##  Max.   :2023-01-01 00:00:00   Max.   :24.00   Max.   :8.053
Interpretation: On average the number of interest rate of mortgage over a 20-year period is 5.08

Step 4. Construct a time series plot

ggplot(df, aes(x = Period, y = `Interest_Rate`)) +
  geom_line() +
  geom_point() +
  xlab("Period") +
  ylab("Interest Rate") +
  ggtitle("Interest Rate of Mortgage")

Interpretation: The Time Series Plot exhibits a Trend Pattern as there are gradual shifts to lower values over a long period of time. We observe a decreasing pattern or trend in this time series plot.

Question 4. Develop the linear trend equation for this time series

Step 5. Devlop a linear trend equation

model <- lm(`Interest_Rate` ~ Period, data = df)
summary(model)
## 
## Call:
## lm(formula = Interest_Rate ~ Period, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3622 -0.7212 -0.2823  0.5015  3.1847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.69541    0.43776  15.295 3.32e-13 ***
## Period      -0.12890    0.03064  -4.207 0.000364 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.039 on 22 degrees of freedom
## Multiple R-squared:  0.4459, Adjusted R-squared:  0.4207 
## F-statistic:  17.7 on 1 and 22 DF,  p-value: 0.0003637
Interpretation: The Estimated linear trend equation: interest rate = 6.70 - 0.13*Period OR T_hat = 6.70 - 0.13*t
# The R-Square is 0.45 (Moderately fits the data)
# The overall model is significant as p-value < 0.05

Question 5. Forecast the Average interest rate for the period 25.

Step 6. To find the MSE and MAPE values

# Calculate the fitted values from the model
df$predicted_interest_rat <- predict(model)

# Calculate the residuals
df$residuals <- df$Interest_Rate - df$predicted_interest_rat

# Calculate the Mean Squared Error (MSE)
mse <- mean(df$residuals^2)
cat("Mean Squared Error (MSE):", mse, "\n")
## Mean Squared Error (MSE): 0.989475
# BONUS SECTION: Calculate Mean Absolute Percentage Error (MAPE)
df$percentage_error <- abs(df$residuals / df$Interest_Rate) * 100
mape <- mean(df$percentage_error)
cat("Mean Absolute Percentage Error (MAPE)", mape, "%\n")
## Mean Absolute Percentage Error (MAPE) 15.79088 %
Interpretation: The MSE is 0.99. The MAPE is 15.79%

Step 7. Forecast the number of interest rate in Period 25 (i.e., period 25)

forecast_period_25 <- predict(model, newdata = data.frame(Period = 25))
forecast_period_25
##        1 
## 3.472942
Interpretation: The Forecasted number of interest rate in 2024 is 3.47