Question 1: Native Approach

Time Series Data

week <- 1:6 #This is the independent variable - time
values <- c(17, 13, 15, 11, 17, 14) #dependent variable

Part A: Most Recent Value as Forecast

forecast_a <- values[-length(values)] #Excludes the last value
actual_a <- values[-1] #Exclude the first sale
mae_a <- mean(abs(actual_a - forecast_a)) # Mean absolute error
mae_a #Mean absolute error is 16.2
## [1] 3.8
mse_a <- mean((actual_a - forecast_a)^2)
mse_a #Mean square error is 16.2
## [1] 16.2
mape_a <- mean(abs((actual_a - forecast_a) / actual_a)) * 100 # Mean absolute percentage error
mape_a #Mean absolute percentage error is 16.2
## [1] 27.43778

####Forecast the values for week 7

forecast_week7_a <- tail(values, 1)
forecast_week7_a
## [1] 14

###Interpretation: The number to be sold in week 7 is 14

Question 2: Moving Average and Exponential Smoothing Approach

Part A. Moving Average

#install.packages("dplyr")
#install.packages("zoo")

library(dplyr)
## Warning: 套件 'dplyr' 是用 R 版本 4.4.2 來建造的
## 
## 載入套件:'dplyr'
## 下列物件被遮斷自 'package:stats':
## 
##     filter, lag
## 下列物件被遮斷自 'package:base':
## 
##     intersect, setdiff, setequal, union
library(zoo)
## Warning: 套件 'zoo' 是用 R 版本 4.4.2 來建造的
## 
## 載入套件:'zoo'
## 下列物件被遮斷自 'package:base':
## 
##     as.Date, as.Date.numeric

Time Series Data

df <- data.frame(month=c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12),
                 values=c(240, 352, 230, 260, 280, 322, 220, 310, 240, 310, 240, 230))

Descriptive statistics

summary(df)
##      month           values     
##  Min.   : 1.00   Min.   :220.0  
##  1st Qu.: 3.75   1st Qu.:237.5  
##  Median : 6.50   Median :250.0  
##  Mean   : 6.50   Mean   :269.5  
##  3rd Qu.: 9.25   3rd Qu.:310.0  
##  Max.   :12.00   Max.   :352.0

Interpretation: The average values over the 12-month period is 269.5

Time series plot

plot(df$month, df$values, type = "o", col = "blue", xlab = "Month", ylab = "Values", main = "Alabama Building Values Plot")

Interpretation: This time series plot shows the volatility of the data, varying from 220 to 352, indicating that the values have not stabilized around the mean.

Part A. Moving Average

Manually calculate the Three-Week Moving Average

df$avg_values3 <- c(NA, NA, NA,
                    (df$values[1] + df$values[2] + df$values[3]) / 3,
                    (df$values[2] + df$values[3] + df$values[4]) / 3,
                    (df$values[3] + df$values[4] + df$values[5]) / 3,
                    (df$values[4] + df$values[5] + df$values[6]) / 3,
                    (df$values[5] + df$values[6] + df$values[7]) / 3,
                    (df$values[6] + df$values[7] + df$values[8]) / 3,
                    (df$values[7] + df$values[8] + df$values[9]) / 3,
                    (df$values[8] + df$values[9] + df$values[10]) / 3,
                    (df$values[9] + df$values[10] + df$values[11]) / 3
                    )

Calculation the squared errors (only for months were moving average is available)

df <- df %>%
  mutate(
    squared_error = ifelse(is.na(avg_values3), NA, (values - avg_values3)^2)
  )

Compute MSE (excluding the initial months with NA)

mse <- mean(df$squared_error, na.rm = TRUE)
mse #Output the MSE = 2040.44
## [1] 2040.444

Part B. Exponential Smoothing

alpha <- 0.2
exp_smooth <- rep(NA, length(df$values))
exp_smooth[1] <- df$values[1] #Starting point
for(i in 2: length(df$values)) {
  exp_smooth[i] <- alpha * df$values[i-1] + (1 - alpha) * exp_smooth[i-1]
}
mse_exp_smooth <- mean((df$values[2:12] - exp_smooth[2:12])^2)
mse_exp_smooth #Outpot the MSE = 2536.44
## [1] 2593.762

Comparison

better_method <- ifelse(mse < mse_exp_smooth, "Three-Month Moving Average", "Exponential Smoothing")

Results

list(
  MSE_Moving_Average = mse,
  MSE_Exponential_Smoothing = mse_exp_smooth,
  Better_Method = better_method
)
## $MSE_Moving_Average
## [1] 2040.444
## 
## $MSE_Exponential_Smoothing
## [1] 2593.762
## 
## $Better_Method
## [1] "Three-Month Moving Average"

Question 3: Linear Trend Regression Approach

#install.packages("readxl")
#install.packages("ggplot2")
library(readxl)
## Warning: 套件 'readxl' 是用 R 版本 4.4.2 來建造的
library(ggplot2)
## Warning: 套件 'ggplot2' 是用 R 版本 4.4.2 來建造的
df <- read_excel(file.choose())

Descriptive statistics

summary(df)
##       Year                         Period      Interest_Rate  
##  Min.   :2000-01-01 00:00:00   Min.   : 1.00   Min.   :2.958  
##  1st Qu.:2005-10-01 18:00:00   1st Qu.: 6.75   1st Qu.:3.966  
##  Median :2011-07-02 12:00:00   Median :12.50   Median :4.863  
##  Mean   :2011-07-02 18:00:00   Mean   :12.50   Mean   :5.084  
##  3rd Qu.:2017-04-02 06:00:00   3rd Qu.:18.25   3rd Qu.:6.105  
##  Max.   :2023-01-01 00:00:00   Max.   :24.00   Max.   :8.053

On average the interest rate of FreddieMac website over 20 years

Construct a time series plot

ggplot(df, aes(x = Period, y = Interest_Rate)) +
  geom_line() + 
  geom_point() + 
  xlab("Period") + 
  ylab("Interest Rate") + 
  ggtitle("Times Series Plot of FreddieMac Website Interest Rate")

Interpretation: The plot shows a fluctuation pattern of first falling and then rising.

Develop a linear trend equation

model <- lm(Interest_Rate ~ Period, data = df)
summary(model)
## 
## Call:
## lm(formula = Interest_Rate ~ Period, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3622 -0.7212 -0.2823  0.5015  3.1847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.69541    0.43776  15.295 3.32e-13 ***
## Period      -0.12890    0.03064  -4.207 0.000364 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.039 on 22 degrees of freedom
## Multiple R-squared:  0.4459, Adjusted R-squared:  0.4207 
## F-statistic:  17.7 on 1 and 22 DF,  p-value: 0.0003637

Result - estimated linear trend equation: Interest Rate = 6.70 - 0.13*Period

The R-square is 0.45 (Moderately fits the data)

The overall model is significant as p-value < 0.05