library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(AICcmodavg)

Question 1

Question 1 a.

# Ensures reproducibility
set.seed(123) 

# Number of observations
n <- 10000 

# Variance
sigma_squared <- 2 


white_noise <- rnorm(n, mean = 0, sd = sqrt(sigma_squared))

mean_wn <- mean(white_noise)
var_wn <- var(white_noise)

cat("Mean of the Gaussian White Noise series:", mean_wn, "\n")
## Mean of the Gaussian White Noise series: -0.003354093
cat("Variance of the Gaussian White Noise series:", var_wn, "\n")
## Variance of the Gaussian White Noise series: 1.99455
plot(white_noise, type = "l", main = "Gaussian White Noise Series", ylab = "Value",
     xlab = "Time")

acf(white_noise, lag.max = 20, main = "Autocovariance of the Gaussian White Noise Series")

No autocorrelation can be seen at other lags. At 0 it is present because its self autocorrelated, the value of the series at any time point t is independent of its value at any other time point t+h, for any non-zero lag h.

All 3 conditions are fulfilled: 1. The mean must be constant over time. 2. The variance must be constant over time. 3. The autocovariance between any two points depends only on the time lag t and t+h.

Thus all 3 cionditions of weak stationarity are fulfilled.

1 b.

n < 500
## [1] FALSE
mean <- 0

var_1 <- 1

var_10 <- 10

wn_1 <- rnorm(n, mean = 0, sd = sqrt(var_1))

wn_10 <- rnorm(n, mean = 0, sd = sqrt(var_10))


# Setting up the plotting area to have 2 rows and 1 column
par(mfrow=c(2,1))

# Plotting the first series with variance 1.0
plot(wn_1, type = "l", main = "Gaussian White Noise with Variance 1", ylab = "Value", xlab = "Time")

# Plotting the second series with variance 10.0
plot(wn_10, type = "l", main = "Gaussian White Noise with Variance 10", ylab = "Value", xlab = "Time")

par(mfrow=c(1,1))

The spread of w_10 can be seen wider in respect to w_1.

Question 2 (Shumway and Stoffer, Problem 2.3.)

Shumway and Stoffer, Problem 2.3 (a)

par(mfrow=c(2,2), mar=c(2.5,2.5,0,0)+.5, mgp=c(1.6,.6,0)) # set up

for (i in 1:4){
  x = ts(cumsum(rnorm(100, .01, 1))) # data
  regx = lm(x~0+time(x), na.action=NULL) # regression
  plot(x, ylab='Random Walk w Drift') # plots
  abline(a=0, b=.01, col=2, lty=2) # true mean (red - dashed)
  abline(regx, col=4) # fitted line (blue - solid)

}

Shumway and Stoffer, Problem 2.3 (b)

par(mfrow=c(2,2), mar=c(2.5,2.5,0,0)+.5, mgp=c(1.6,.6,0)) # set up

for (i in 1:4){
  
  t <- 1:100  # Time steps
  
  wt <- rnorm(100, 0, 1)
  yt <- .01*t + wt
  
  reg <- lm(yt~t)
  
  # Plot the series
  plot(t, yt, type='l', main=paste("Series", i), ylab="yt", xlab="Time")
    
  # Plot the true mean function
  lines(t, .01 * t, col="red", lwd=2)
    
  # Add the fitted regression line
  abline(reg, col="blue", lwd=2)
    
  # Add a legend
  legend("topleft", legend=c("Data", "True Mean (.01t)", "Fitted Line"), col=c("black", "red", "blue"), lty=1, cex=0.8)
  }

Shumway and Stoffer, Problem 2.3 (c)

The true line and the fitted line aree not that close in the case of linear process. Also, more arbitrary movements can be seen in the random drift model because of the drift. The time series varies much more.

A random walk with drift incorporates dependence between sequential observations, where each value is a step from the previous one plus some noise and a constant drift. In contrast, a linear trend plus noise model represents a deterministic trend with independent noise added at each time step.

Question 3 (Shumway and Stoffer, Problem 2.6.)

Shumway and Stoffer, Problem 2.6 (a)

# Parameters
beta_0 <- 0.5
beta_1 <- 0.2
sigma_w <- 1
t <- 1:100  # Time steps

# Set seed for reproducibility
set.seed(42)

# Generate independent random variables w_t with zero mean and variance sigma_w^2
w_t <- rnorm(n = length(t), mean = 0, sd = sigma_w)

# Generate the process x_t
x_t <- beta_0 + beta_1 * t + w_t


plot(t, x_t, type = 'l', main = "Series xt (Nonstationary)")
abline(lm(x_t ~ t), col = "red")  # Linear trend

acf(x_t, main="ACF of Time Series with Linear Trend")

# Compute means over segments
segment_length <- 20  
segment_means <- sapply(seq(1, n, by = segment_length), function(i) {
  mean(x_t[i:min(i+segment_length-1, n)])
})

# Time points for each segment's mean for plotting
segment_times <- seq(1, n, by = segment_length) + segment_length / 2

# Plotting the time series and segment means
plot(t, x_t, type = 'l', main = "Time Series with Linear Trend", ylab = "x_t", xlab = "Time")
points(segment_times, segment_means, col = "red", pch = 16, cex = 1.5)
lines(segment_times, segment_means, col = "red", lwd = 2, type = 'o')

For a stationary time series, the following conditions should be satisfied:

Mean should be constant over time, the variance should not change over time and The autocovariance between any two points in the series must depend only on the lag h between those points and not on the actual time t.

The a fitted linear trend to xt diverges from the horizontal line (that is constant mean) shows the presence of non stationarity.

Here the assumption of constant mean over time is violated and hence the series non stationary. Although, the variance is same over time.

** the autocovariance between t and t+h will also vary depending on the specific time points t and t+h, not just the lag h. This violates the condition for stationarity, which requires the autocovariance to be invariant over time. **

The third plot again shows the mean is varying for this series if we see the different segments.

Shumway and Stoffer, Problem 2.6 (b)

First difference is equal to delta_xt <= xt - xt-1

delta_xt = beta0 + beta1t + wt - (beta0 + (beta1 (t-1) + wt-1)

delta_xt = beta0 + beta1t + wt -beta0 -beta1t + beta1 -wt-1

delta_xt = beta1 + wt - wt-1

Mean(delta_xt) = E( beta1 + wt - wt-1)

Since mean of wt, wt-1 = 0, the mean of first auto difference simplifies to beta1 which is a constant.

therefore the first difference comes out to be beta1 which is a constant.

# Computing the first difference of x_t
delta_x_t <- diff(x_t)


# Computing the mean of Δx_t as an estimate for β1
mean_delta_x_t <- mean(delta_x_t)

# Print the recalculated mean of Δx_t
cat("Computed Mean of Δx_t:", mean_delta_x_t, "\n")
## Computed Mean of Δx_t: 0.19275
# Compute means over segments
segment_length <- 20
# Correcting n to represent the length of delta_x_t or x_t
n <- length(delta_x_t)  # or use length(t)-1, which should be the same
segment_means <- sapply(seq(1, n, by = segment_length), function(i) {
  mean(delta_x_t[i:min(i+segment_length-1, n)])
})

# Time points for each segment's mean for plotting
segment_times <- seq(1, n, by = segment_length) + segment_length / 2 - 1  # Adjusted for the length of delta_x_t

# Plotting the time series and segment means
plot(t[-1], delta_x_t, type = 'l', main = "Time Series with Linear Trend", ylab = "Δx_t", xlab = "Time")
points(segment_times, segment_means, col = "red", pch = 16, cex = 1.5)
lines(segment_times, segment_means, col = "red", lwd = 2, type = 'o')

acf(delta_x_t)

The mean of first autodifference is equal to beta1 which we previously derived. It can be seen from third plot also the mean is nearly constant for all the segments.

The autocorrelation at lag 1 appears to be very close to zero and within the confidence bounds, indicating no significant autocorrelation at this lag. At all other lags there is no autocorrelation thus we can say no autocovariance.

Shumway and Stoffer, Problem 2.6 (c)

First difference is equal to xt = beta0 + beta1*t + yt where yt is a stationary process

mean(yt) = µy

delta_xt > = beta0 + beta1t + yt - (beta0 + (beta1 (t-1) + yt-1)

delta_xt > beta1 + yt - yt-1

E(delta_xt) > E(beta1 + yt - yt-1)

E(delta_xt) > E(beta1) + E(yt) - E(yt-1)

E(delta_xt) > beta1 + µy - µy

E(delta_xt) > beta1

Since beta1 is a constant, the mean of delta_xt does not depend on time, implying that delta_xt is stationary in mean.

For series delta_xt, autocovariance function at lag h is: = COV(delta_xt, delta_xt+h) = COV(beta1 + yt - yt-1, beta1 + yt+h - yt+h-1)

Covariance of beta1 is zero as its constant.

= COV( yt - yt-1, yt+h - yt+h-1) = COV( yt, yt+h) - COV( yt, yt+h-1) - COV( yt-1, yt+h) + COV( yt-1, yt+h-1)

By stationarity, the covariance between two values depends only on the lag between them:

γdelta_x(h) = γy(h) + γy (h-1) - γy (h+1) + γy (h)

for h = 0,

γdelta_x(0) = γy(0) + γy (-1) - γy (1) + γy (0)

As autocovariance is symmetric γy (-1) = γy (1)

γdelta_x(0) = 2 * γy(0) + 2* γy (1)

for h!=0,

γdelta_x(h) = 2 * γy(h) + γy (h-1) + γy (h+1)

Therefore, the autocovariance function of the first difference of xt when yt is a general stationary process. It depends only on the lags and not on the specific time t, which confirms that delta_xt is stationary in autocovariance.

set.seed(42) # For reproducibility

# Number of samples
n <- 1000

# Generate a stationary process y_t
mu_y <- 1
sigma_y <- 1
y_t <- rnorm(n, mean = mu_y, sd = sigma_y)

# Generate a non-stationary process x_t = beta_0 + beta_1*t + y_t
beta_0 <- 1
beta_1 <- 0.1
t <- 1:n
x_t <- beta_0 + beta_1 * t + y_t

# Calculate the first difference of x_t
delta_x_t <- diff(x_t)

# Compute the mean of the first difference delta_x_t
mean_delta_x_t <- mean(delta_x_t)
print(mean_delta_x_t)
## [1] 0.09819402
# Function to compute autocovariance for a given lag
autocovariance <- function(series, lag) {
  n <- length(series)
  mean_series <- mean(series)
  autocov <- sum((series[1:(n-lag)] - mean_series) * (series[(lag+1):n] - mean_series)) / n
  return(autocov)
}

# Calculate autocovariances for a range of lags
lags <- 0:19
autocovariances <- sapply(lags, function(lag) autocovariance(delta_x_t, lag))

# Print mean of delta_x_t
print(paste("Mean of Delta x_t:", mean_delta_x_t))
## [1] "Mean of Delta x_t: 0.0981940212254677"
# Print autocovariances
print("Autocovariances for Delta x_t at different lags:")
## [1] "Autocovariances for Delta x_t at different lags:"
print(autocovariances)
##  [1]  2.002577330 -0.973006669 -0.022535578 -0.066277550  0.047118931
##  [6]  0.118783937 -0.127315617 -0.063656344  0.139443148 -0.063522464
## [11]  0.028097011  0.003794129 -0.088465628  0.081955950  0.039621718
## [16] -0.088555715  0.008095074  0.031619061  0.013595120  0.013787055
acf(delta_x_t, main="ACF Plot of First Difference of x_t")

The mean of differencing is nearly same to beta1.

The ACF plot as well as the Acf for different lags show that the values are not significant and that it does not depend upon time t. Thus the process is stationary.

Question 4

Question 4.a)

library(car)
## Loading required package: carData
sales <- read.table("/Users/fahadmehfooz/Downloads/skisales.csv",header=TRUE,sep=",")

# Fit a regression model with seasonal dummies
model <- lm(Sales ~ PDI, data = sales)

acf(residuals(model), main="abc")

dw_result <- dwtest(model)
cat("Result for durbin-waston test", "\n")
## Result for durbin-waston test
print(dw_result)
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 1.9684, p-value = 0.3933
## alternative hypothesis: true autocorrelation is greater than 0
 durbinWatsonTest(model, max.lag = 12)
##  lag Autocorrelation D-W Statistic p-value
##    1   -0.0008866674     1.9683944   0.840
##    2   -0.8125024154     3.5599554   0.000
##    3    0.0575221716     1.7540985   0.514
##    4    0.7567478950     0.3213044   0.000
##    5    0.0018187890     1.8157218   0.904
##    6   -0.7117218245     3.1574946   0.000
##    7    0.0260916991     1.6260846   0.694
##    8    0.7338706332     0.1949808   0.000
##    9   -0.0444055276     1.7066345   0.768
##   10   -0.6959877610     2.9730638   0.000
##   11    0.0016593823     1.5281830   0.932
##   12    0.5765261866     0.3244718   0.000
##  Alternative hypothesis: rho[lag] != 0

Durbin watson test with p-value greater than 0.5 and dw statistic very near to 2 as well as the ACF plot suggests there is no significant autocorrelation for lag 1. But for other higher order lags autocorrelation can be seen.

For all lags 2,4,6,8,10,12 p value is less than .05 becayse of which we would have to reject null hypothesis and we can say there exists a significant autocorrelation.

Question 4.b)

# Creating dummy variables for quarters
Q1 = rep(c(1,0,0,0), 10)
Q2 = rep(c(0,1,0,0), 10)
Q3 = rep(c(0,0,1,0), 10)
Q4 = rep(c(0,0,0,1), 10)


sales$Lag2Sales <- c(rep(NA, 2), head(sales$Sales, -2))
sales$Lag4Sales <- c(rep(NA, 4), head(sales$Sales, -4))
sales$Lag6Sales <- c(rep(NA, 6), head(sales$Sales, -6))
sales$Lag8Sales <- c(rep(NA, 8), head(sales$Sales, -8))
sales$Lag10Sales <- c(rep(NA, 10), head(sales$Sales, -10))
sales$Lag12Sales <- c(rep(NA, 12), head(sales$Sales, -12))


model_aic <- numeric()
model_bic <- numeric()
model_mape <- numeric()


# List of lag variables
lags <- c("Lag2Sales", "Lag4Sales", "Lag6Sales", "Lag8Sales", "Lag10Sales", "Lag12Sales")

# Loop through each lag, fit the model, and store AIC & BIC
for (lag in lags) {
  formula <- as.formula(paste("Sales ~ PDI + Q1 + Q2 + Q3 + Q4 +", lag))
  
  model <- lm(formula, data = sales, na.action = na.exclude)
  
  # Store AIC and BIC for each model
  model_aic[lag] <- AIC(model)
  model_bic[lag] <- BIC(model)
  
  print(paste("Model with", lag))
  print(summary(model))
  
  # Predict sales and calculate MAPE
  predicted_sales <- predict(model, newdata = sales, na.action = na.exclude)
  actual_sales <- sales$Sales[!is.na(predicted_sales)]  
  
  mape <- mean(abs((actual_sales - predicted_sales) / actual_sales), na.rm = TRUE) * 100
  model_mape[lag] <- mape
  
  # Plot ACF of residuals
  acf(na.omit(residuals(model)), main=paste("ACF of residuals for model with", lag))

}
## [1] "Model with Lag2Sales"
## 
## Call:
## lm(formula = formula, data = sales, na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.60666 -0.85370  0.06206  0.52430  2.73770 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.90902    1.97558   7.040 5.55e-08 ***
## PDI          0.18785    0.03780   4.970 2.17e-05 ***
## Q1           0.34706    0.54893   0.632    0.532    
## Q2          -5.74444    1.10113  -5.217 1.06e-05 ***
## Q3          -5.64429    1.14694  -4.921 2.50e-05 ***
## Q4                NA         NA      NA       NA    
## Lag2Sales    0.06551    0.18668   0.351    0.728    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.193 on 32 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.9715, Adjusted R-squared:  0.9671 
## F-statistic: 218.2 on 5 and 32 DF,  p-value: < 2.2e-16
## Warning in actual_sales - predicted_sales: longer object length is not a
## multiple of shorter object length
## [1] "Model with Lag4Sales"
## 
## Call:
## lm(formula = formula, data = sales, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8703 -0.7158 -0.1912  0.8171  2.1316 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 20.25494    2.50805   8.076 5.15e-09 ***
## PDI          0.28215    0.03418   8.254 3.26e-09 ***
## Q1           0.44387    0.52587   0.844   0.4053    
## Q2          -7.65316    1.04822  -7.301 3.95e-08 ***
## Q3          -7.42820    1.05540  -7.038 8.00e-08 ***
## Q4                NA         NA      NA       NA    
## Lag4Sales   -0.41933    0.16937  -2.476   0.0192 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.107 on 30 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.9736, Adjusted R-squared:  0.9692 
## F-statistic: 221.1 on 5 and 30 DF,  p-value: < 2.2e-16
## Warning in predict.lm(model, newdata = sales, na.action = na.exclude):
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases

## Warning in predict.lm(model, newdata = sales, na.action = na.exclude): longer
## object length is not a multiple of shorter object length

## [1] "Model with Lag6Sales"
## 
## Call:
## lm(formula = formula, data = sales, na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.34601 -0.74803 -0.08839  0.75926  2.51423 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14.76163    1.80463   8.180 6.64e-09 ***
## PDI          0.16959    0.03684   4.603 8.20e-05 ***
## Q1           0.49225    0.56980   0.864    0.395    
## Q2          -5.83332    1.10341  -5.287 1.27e-05 ***
## Q3          -5.83936    1.13689  -5.136 1.91e-05 ***
## Q4                NA         NA      NA       NA    
## Lag6Sales    0.12283    0.18192   0.675    0.505    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.171 on 28 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.9674, Adjusted R-squared:  0.9616 
## F-statistic: 166.3 on 5 and 28 DF,  p-value: < 2.2e-16
## Warning in actual_sales - predicted_sales: longer object length is not a
## multiple of shorter object length

## [1] "Model with Lag8Sales"
## 
## Call:
## lm(formula = formula, data = sales, na.action = na.exclude)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4301 -0.8194  0.1123  0.6672  2.3892 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 15.56268    2.52543   6.162 1.62e-06 ***
## PDI          0.17500    0.03804   4.600 9.66e-05 ***
## Q1           0.23670    0.59108   0.400 0.692087    
## Q2          -4.98953    1.16773  -4.273 0.000229 ***
## Q3          -4.93422    1.18511  -4.164 0.000305 ***
## Q4                NA         NA      NA       NA    
## Lag8Sales    0.07687    0.18670   0.412 0.683927    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.173 on 26 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.9646, Adjusted R-squared:  0.9578 
## F-statistic: 141.7 on 5 and 26 DF,  p-value: < 2.2e-16
## Warning in actual_sales - predicted_sales: longer object length is not a
## multiple of shorter object length

## [1] "Model with Lag10Sales"
## 
## Call:
## lm(formula = formula, data = sales, na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.70213 -0.75332  0.08297  0.57778  2.42817 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.31675    1.84678   9.918 5.77e-10 ***
## PDI          0.23674    0.03411   6.941 3.54e-07 ***
## Q1           0.23413    0.60525   0.387 0.702291    
## Q2          -4.08997    1.04149  -3.927 0.000633 ***
## Q3          -3.98345    1.09725  -3.630 0.001333 ** 
## Q4                NA         NA      NA       NA    
## Lag10Sales  -0.25393    0.17135  -1.482 0.151365    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.161 on 24 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.9629, Adjusted R-squared:  0.9551 
## F-statistic: 124.5 on 5 and 24 DF,  p-value: 2.358e-16
## Warning in actual_sales - predicted_sales: longer object length is not a
## multiple of shorter object length

## [1] "Model with Lag12Sales"
## 
## Call:
## lm(formula = formula, data = sales, na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.33367 -0.60927 -0.09621  0.81276  2.10331 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 22.52662    2.36995   9.505 3.01e-09 ***
## PDI          0.25003    0.02975   8.405 2.58e-08 ***
## Q1           0.10884    0.56874   0.191   0.8500    
## Q2          -7.62107    1.00941  -7.550 1.52e-07 ***
## Q3          -7.53894    1.02989  -7.320 2.50e-07 ***
## Q4                NA         NA      NA       NA    
## Lag12Sales  -0.37525    0.15402  -2.436   0.0234 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.056 on 22 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.9655, Adjusted R-squared:  0.9577 
## F-statistic: 123.3 on 5 and 22 DF,  p-value: 2.494e-15
## Warning in actual_sales - predicted_sales: longer object length is not a
## multiple of shorter object length

# Print AIC and BIC values
print("AIC values for models:")
## [1] "AIC values for models:"
print(model_aic)
##  Lag2Sales  Lag4Sales  Lag6Sales  Lag8Sales Lag10Sales Lag12Sales 
##  128.70695  116.92559  114.60651  108.38456  101.38040   89.78115
print("BIC values for models:")
## [1] "BIC values for models:"
print(model_bic)
##  Lag2Sales  Lag4Sales  Lag6Sales  Lag8Sales Lag10Sales Lag12Sales 
##  140.17005  128.01022  125.29104  118.64471  111.18879   99.10658
print("MAPE values for models:")
## [1] "MAPE values for models:"
print(model_mape)
##  Lag2Sales  Lag4Sales  Lag6Sales  Lag8Sales Lag10Sales Lag12Sales 
##  14.155423   7.630914  15.651068  12.610771  16.645811  16.159516

The trend we’re observing implies that incorporating higher-order lags helps explain more of the variance in your sales data, potentially capturing underlying patterns such as seasonality which are not accounted for by lower-order lags or other predictors.

However, this also means are there are chances of overfitting. Although the AIC-BIC value is the best for lag12Sales, however there are chances of overfitting. If we see the lag4model and the residuals of ACF we observe all the lags are inside the blue bars suggest no autocorrelation.

Also, the MAPE also is lowest for lag4 model. And hence we can choose lag4 model.

Question 5

load("/Users/fahadmehfooz/Downloads/tsa3.rda")
Yt <- jj
Xt <- log(Yt)  # Log transform

Yt_ts <- ts(as.vector(t(Yt)), start = c(1960, 1), frequency = 4)

training_data <- Xt[1:80]
test_data <- Xt[81:84]

training_data_original <- Yt_ts[1:80]  # Original data for M4
t <- 1:80  # Time index for the training data

# Quarterly indicators for M1
Q1 <- ifelse((1:80) %% 4 == 1, 1, 0)
Q2 <- ifelse((1:80) %% 4 == 2, 1, 0)
Q3 <- ifelse((1:80) %% 4 == 3, 1, 0)

# M1: Regression with linear trend and quarters
model_M1 <- lm(training_data ~ t + Q1 + Q2 + Q3)

# M2: Regression with linear trend and trigonometric functions
omega <- 2 * pi / 4
model_M2 <- lm(training_data ~ t + sin(omega * t) + cos(omega * t))

# M3: Additive Holt-Winters on log-transformed data
hw_additive <- HoltWinters(ts(training_data, start = c(1960, 1), frequency = 4), seasonal = "additive")

# M4: Multiplicative Holt-Winters on original data
hw_multiplicative <- HoltWinters(ts(training_data_original, start = c(1960, 1), frequency = 4), seasonal = "multiplicative")

# Forecasting
forecasts_M1 <- predict(model_M1, newdata = data.frame(t = 81:84, Q1 = c(1,0,0,0), Q2 = c(0,1,0,0), Q3 = c(0,0,1,0)))
forecasts_M2 <- predict(model_M2, newdata = data.frame(t = 81:84))

forecasts_M3 <- forecast(hw_additive, h = 4)
forecasts_M4 <- forecast(hw_multiplicative, h = 4)



cat("Forecast of M1\n" )
## Forecast of M1
cat(forecasts_M1)
## 2.738406 2.816778 2.929872 2.707658
cat("\n")
cat("Forecast of M2\n" )
## Forecast of M2
cat(forecasts_M2)
## 2.679123 2.871558 2.870589 2.762438
cat("\n")
cat("Forecast of M3\nn" )
## Forecast of M3
## n
print(forecasts_M3)
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 1980 Q1       2.769067 2.658066 2.880067 2.599306 2.938827
## 1980 Q2       2.725993 2.610825 2.841161 2.549858 2.902127
## 1980 Q3       2.808784 2.688547 2.929021 2.624898 2.992670
## 1980 Q4       2.458894 2.332679 2.585108 2.265865 2.651923
cat("\n")
cat("Forecast of M4\n" )
## Forecast of M4
print(forecasts_M4)
##         Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## 1980 Q1       15.83356 15.363885 16.30324 15.115253 16.55187
## 1980 Q2       14.83563 14.360164 15.31109 14.108470 15.56278
## 1980 Q3       16.49996 16.008691 16.99122 15.748632 17.25128
## 1980 Q4       11.25638  9.745616 12.76714  8.945867 13.56689
cat("\n")
# Actual values for MAPE calculation
actuals_log_transformed <- exp(test_data)  # Convert back from log for M1, M2, M3
actuals_original <- Yt_ts[81:84]  # Original, non-log-transformed data for M4


# Adjusted R-squared, AIC, and BIC for M1
summary(model_M1)$adj.r.squared
## [1] 0.9844282
# Adjusted R-squared, AIC, and BIC for M2
summary(model_M2)$adj.r.squared
## [1] 0.981142
# Calculating MAPE for M1
mape_M1 <- mean(abs((actuals_log_transformed - exp(forecasts_M1)) / actuals_log_transformed)) * 100

# Calculating MAPE for M2
mape_M2 <- mean(abs((actuals_log_transformed - exp(forecasts_M2)) / actuals_log_transformed)) * 100

# Calculating MAPE for M3
mape_M3 <- mean(abs((actuals_log_transformed - exp(forecasts_M3$mean)) / actuals_log_transformed)) * 100

# Calculating MAPE for M4
mape_M4 <- mean(abs((actuals_original - forecasts_M4$mean) / actuals_original)) * 100


residuals_m3 <- residuals(hw_additive)
squared_residuals_m3 <- residuals_m3^2

residuals_m4 <- residuals(hw_additive)
squared_residuals_m4 <- residuals_m4^2

# For Gaussian noise, -n/2 * log(2*pi*sigma^2) - 1/(2*sigma^2) * sum(squared residuals)
n_m3 <- length(residuals_m3)  # number of observations
sigma_squared_m3 <- sum(squared_residuals_m3) / n_m3  # variance of residuals
log_likelihood_approx_m3 <- -n_m3/2 * log(2*pi*sigma_squared_m3) - 1/(2*sigma_squared_m3) * sum(squared_residuals_m3)

n_m4 <- length(residuals_m4)  # number of observations
sigma_squared_m4 <- sum(squared_residuals_m4) / n_m4  # variance of residuals
log_likelihood_approx_m4 <- -n_m4/2 * log(2*pi*sigma_squared_m4) - 1/(2*sigma_squared_m4) * sum(squared_residuals_m4)


# Number of parameters (k) 
k <- 6  

# AIC and BIC calculation
AIC_m3 <- 2*k - 2*log_likelihood_approx_m3
BIC_m3 <- log(n_m3)*k - 2*log_likelihood_approx_m3

AIC_m4 <- 2*k - 2*log_likelihood_approx_m4
BIC_m4 <- log(n_m4)*k - 2*log_likelihood_approx_m4

cat("Adjusted R-squared for M1:", summary(model_M1)$adj.r.squared, "\n")
## Adjusted R-squared for M1: 0.9844282
cat("AIC for M1:", AIC(model_M1), "\n")
## AIC for M1: -100.6372
cat("BIC for M1:", BIC(model_M1), "\n\n")
## BIC for M1: -86.34505
cat("Adjusted R-squared for M2:", summary(model_M2)$adj.r.squared, "\n")
## Adjusted R-squared for M2: 0.981142
cat("AIC for M2:", AIC(model_M2), "\n")
## AIC for M2: -86.25928
cat("BIC for M2:", BIC(model_M2), "\n\n")
## BIC for M2: -74.34914
cat("AIC for M3 (Additive Holt-Winters):", AIC_m3, "\n")
## AIC for M3 (Additive Holt-Winters): -144.6597
cat("BIC for M3 (Additive Holt-Winters):", BIC_m3, "\n\n")
## BIC for M3 (Additive Holt-Winters): -130.6753
cat("AIC for M4 (Multiplicative Holt-Winters):", AIC_m4, "\n")
## AIC for M4 (Multiplicative Holt-Winters): -144.6597
cat("BIC for M4 (Multiplicative Holt-Winters):", BIC_m4, "\n\n")
## BIC for M4 (Multiplicative Holt-Winters): -130.6753
cat("MAPE for M1:", mape_M1, "\n")
## MAPE for M1: 16.14556
cat("MAPE for M2:", mape_M2, "\n")
## MAPE for M2: 19.25979
cat("MAPE for M3:", mape_M3, "\n")
## MAPE for M3: 2.486006
cat("MAPE for M4:", mape_M4, "\n\n")
## MAPE for M4: 2.358198
# RMSE Calculation for M1
rmse_M1 <- sqrt(mean((actuals_log_transformed - exp(forecasts_M1))^2))

# RMSE Calculation for M2
rmse_M2 <- sqrt(mean((actuals_log_transformed - exp(forecasts_M2))^2))

# RMSE Calculation for M3
rmse_M3 <- sqrt(mean((actuals_log_transformed - exp(forecasts_M3$mean))^2))

# RMSE Calculation for M4
rmse_M4 <- sqrt(mean((actuals_original - forecasts_M4$mean)^2))

# Print RMSE Results
cat("RMSE for M1:", rmse_M1, "\n")
## RMSE for M1: 2.425337
cat("RMSE for M2:", rmse_M2, "\n")
## RMSE for M2: 2.834807
cat("RMSE for M3:", rmse_M3, "\n")
## RMSE for M3: 0.4355595
cat("RMSE for M4:", rmse_M4, "\n")
## RMSE for M4: 0.3595534
# M1: ACF Plot for Linear Regression with Quarters
residuals_M1 <- residuals(model_M1)
Acf(residuals_M1, main="ACF of Residuals for M1")

# M2: ACF Plot for Linear Regression with Trigonometric Seasonality
residuals_M2 <- residuals(model_M2)
Acf(residuals_M2, main="ACF of Residuals for M2")

# M3: ACF Plot for Additive Holt-Winters
Acf(residuals(hw_additive), main="ACF of Residuals for M3")

# M4: ACF Plot for Multiplicative Holt-Winters
Acf(residuals(hw_multiplicative), main="ACF of Residuals for M4")

On the basis of rmse, MAPE as it is lower for M4. We can choose that model Order if fit: M4 > M3 > M1 > M2

Model M3 on the basis of ACF seems to be having no significant autocorrelation at any lags.

AIC BIC is not a good metric for this task.

M4 is the model for predictive accuracy.