ID: “20228034”
Section : “A”
Load Libraries
library(fma) # dataset
## Warning: package 'fma' was built under R version 4.3.3
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 4.3.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(zoo) # na.approx
## Warning: package 'zoo' was built under R version 4.3.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries) # statistical tests
## Warning: package 'tseries' was built under R version 4.3.2
library(forecast) # auto.arima and forecasting accuracy measures
options(warn=0)
Select the R data set with my ID 20228034
34%%6
## [1] 4
My result got 4 that means i AirPassengers data set for this assignment
Load AirPassengers data set
data(AirPassengers)
#Check the missing values in the entire dataset
missing_values <- sum(is.na(AirPassengers))
cat("Number of missing values:", missing_values, "\n")
## Number of missing values: 0
There are no missing values for this data set, if dataset have missing values then i can handle this with the linear interpolation using ’’’ na.approx(AirPassengers) ’’’
AirPassengers <- na.approx(AirPassengers)
#AirPassengers <- ts(start = c(1949, 1),frequency = 12)
AirPassengers
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
Outliers finding
outliers <- boxplot.stats(AirPassengers)$out
# Plot the AirPassengers data
plot(AirPassengers, main="AirPassengers Data with Outliers Highlighted",
xlab="Time", ylab="Number of Passengers", pch=20, type='o', col="blue")
# Highlight outliers with a different color
points(which(AirPassengers %in% outliers), AirPassengers[which(AirPassengers %in% outliers)],
col="red", pch=20)
# Add a legend
legend("topright", legend=c("Outliers", "Regular observations"),
col=c("red", "blue"), pch=20)
No outliers seen in the data set, using summary library to see the
details about the dataset.
# Apply outlier handling
suppressWarnings({
q_05 <- quantile(AirPassengers, 0.05)
q_95 <- quantile(AirPassengers, 0.95)
AirPassengers[AirPassengers < q_05] <- q_05
AirPassengers[AirPassengers > q_95] <- q_95
summary(AirPassengers)
})
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 121.6 180.0 265.5 277.4 360.5 488.1
# Plot the modified data
plot(AirPassengers, main="Modified AirPassengers Data",
xlab="Time", ylab="Number of Passengers", pch=20, type='o', col="green")
Plot the time series
plot(AirPassengers, main = "Time Plot of AirPassengers data", ylab = "Value", xlab = "Time")
This time series plot displays a rising trend in air passenger numbers
from around 1949 to 1961, with evident seasonal fluctuations. The
increasing variability over time suggests a growing market for air
travel, emphasizing the importance of reliable forecasting methods to
anticipate future demand.
acf(AirPassengers, main = "ACF Plot of Air Passenger", ylab = "Autocorrelation", xlab = "Lag")
The ACF plot indicates significant autocorrelation at various lags. At
lag 0, the autocorrelation is 1, suggesting a strong correlation with
itself. As the lag increases, autocorrelation decreases, but all lags
are still significant. This suggests a strong persistence in the values
over time. Also there are spike in 12 number lags it suggest that data
set have monthly data with seasonality. all lags are cross the
confidence interval which suggest their are no stationary in the data
set. also this plot suggest to outliers in the data.
monthplot(AirPassengers, main = "Seasonal-Subseries Plot of Air Passenger", ylab = "Value", xlab
= "Month")
The seasonal subseries plot indicates a general upward trend across the year with variations in data points suggesting seasonal patterns. Some months, like January and August, show higher variability, whereas others, like February and November, appear more consistent. The plot does not reveal extreme outliers, indicating a relatively stable seasonal trend over the years observed.
#Set the seed for reproducibility
set.seed(20228034)
train_size <- ceiling(0.7 * length(AirPassengers)) # 70% training size
# Calculate the year and month for the end of training data
end_year <- 1949 + (floor((train_size - 1) / 12))
end_month <- ((train_size - 1) %% 12) + 1
suppressWarnings(train_data <- window(AirPassengers, start = c(1949, 1), end = c(end_year, end_month))) #train data
# Calculate the year and month for the start of test data
start_year <- 1949 + (floor(train_size / 12))
start_month <- (train_size %% 12) + 1
suppressWarnings(test_data <- window(AirPassengers, start = c(start_year, start_month))) #test data
# Print the training and test set
print(train_data)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 121.6 121.6 132.0 129.0 121.6 135.0 148.0 148.0 136.0 121.6 121.6 121.6
## 1950 121.6 126.0 141.0 135.0 125.0 149.0 170.0 170.0 158.0 133.0 121.6 140.0
## 1951 145.0 150.0 178.0 163.0 172.0 178.0 199.0 199.0 184.0 162.0 146.0 166.0
## 1952 171.0 180.0 193.0 181.0 183.0 218.0 230.0 242.0 209.0 191.0 172.0 194.0
## 1953 196.0 196.0 236.0 235.0 229.0 243.0 264.0 272.0 237.0 211.0 180.0 201.0
## 1954 204.0 188.0 235.0 227.0 234.0 264.0 302.0 293.0 259.0 229.0 203.0 229.0
## 1955 242.0 233.0 267.0 269.0 270.0 315.0 364.0 347.0 312.0 274.0 237.0 278.0
## 1956 284.0 277.0 317.0 313.0 318.0 374.0 413.0 405.0 355.0 306.0 271.0 306.0
## 1957 315.0 301.0 356.0 348.0 355.0
print(test_data)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 1957 422.00 465.00 467.00 404.00 347.00
## 1958 340.00 318.00 362.00 348.00 363.00 435.00 488.15 488.15 404.00 359.00
## 1959 360.00 342.00 406.00 396.00 420.00 472.00 488.15 488.15 463.00 407.00
## 1960 417.00 391.00 419.00 461.00 472.00 488.15 488.15 488.15 488.15 461.00
## Nov Dec
## 1957 305.00 336.00
## 1958 310.00 337.00
## 1959 362.00 405.00
## 1960 390.00 432.00
I set the seed for reproducibility using set.seed(20228034). The training set (train_data) includes the first 70% of the data. The test set (test_data) includes the remaining 30% of the data starting from the next observation after the training set.
adf_result <- adf.test(train_data)
## Warning in adf.test(train_data): p-value smaller than printed p-value
kpss_result <- kpss.test(train_data)
## Warning in kpss.test(train_data): p-value smaller than printed p-value
print("ADF Result ")
## [1] "ADF Result "
print(adf_result$statistic)
## Dickey-Fuller
## -4.623752
print(adf_result$p.value)
## [1] 0.01
print("KPSS Result ")
## [1] "KPSS Result "
print(kpss_result$statistic)
## KPSS Level
## 1.945694
print(kpss_result$p.value)
## [1] 0.01
For ADF Result , (H0: Non Stationary)
The test statistic is -4.6238, and the p-value is 0.01. Since the p-value is smaller than the significance level of 0.05, we reject the null hypothesis.This suggests that the series is stationary
For KPSS Test, (H0: Stationary)
The null hypothesis of the KPSS test is that the series is stationary around a deterministic trend. The test statistic is 1.9457, and the p-value is 0.01. Since the p-value is less than the significance level of 0.05, we reject the null hypothesis.This suggests evidence of Non stationarity in the series.
Overall,
The ADF test suggests stationarity, while the KPSS test suggests non-stationarity. This situation is often referred to as a “unit root” problem, where the ADF test looks for a unit root (stationarity), and the KPSS test looks for non-stationarity. To reconcile these conflicting results, further analysis is needed.
Next Steps: Given the conflicting results, it’s advisable to proceed with differencing the series and then reassessing stationarity. Let’s proceed with differencing-
diff_seasonal_train_data <- diff(train_data, lag = 12)
diff_seasonal_nonseasonal_train_data <- diff(diff_seasonal_train_data)
Since differencing has been applied, the next steps involve examining the Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots of the differenced series. These plots can help in identifying the orders of the ARIMA model (p, d, q)
acf(diff_seasonal_nonseasonal_train_data, main="ACF of Differenced Series")
pacf(diff_seasonal_nonseasonal_train_data, main="PACF of Differenced Series")
adf_result <- adf.test(diff_seasonal_nonseasonal_train_data)
## Warning in adf.test(diff_seasonal_nonseasonal_train_data): p-value smaller than
## printed p-value
kpss_result <- kpss.test(diff_seasonal_nonseasonal_train_data)
## Warning in kpss.test(diff_seasonal_nonseasonal_train_data): p-value greater
## than printed p-value
cat("ADF Test:\n", "Test Statistic:", adf_result$statistic, "\n", "p-value:", adf_result$p.value, "\n\n")
## ADF Test:
## Test Statistic: -4.909165
## p-value: 0.01
cat("KPSS Test:\n", "Test Statistic:", kpss_result$statistic, "\n", "p-value:", kpss_result$p.value, "\n")
## KPSS Test:
## Test Statistic: 0.05344536
## p-value: 0.1
After apply differencing,
ADF Test: (H0: Non Stationary) p-value: 0.01 is less than 0.05 which means its reject the null hypothesis. Data are stationary
KPSS Test: (H0: Stationary) p-value: 0.1 is grater than 0.05 which means its not reject the null hypothesis. Data are stationary
Fir ARIMA(2,1,1) Model
M1 <- arima(diff_seasonal_nonseasonal_train_data, order = c(2,1,1))
print(summary(M1))
##
## Call:
## arima(x = diff_seasonal_nonseasonal_train_data, order = c(2, 1, 1))
##
## Coefficients:
## ar1 ar2 ma1
## -0.2404 0.0438 -1.0000
## s.e. 0.1074 0.1070 0.0329
##
## sigma^2 estimated as 97.31: log likelihood = -325.04, aic = 658.07
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.6976218 9.808133 7.691337 -Inf Inf 0.5959622 -0.007818179
#Extract AIC from the summary
aic <- summary(M1)$aic
Calculate degrees of freedom
df <- 2
aicc <- aic + (2* df * (df +1)) / (length(diff_seasonal_nonseasonal_train_data) - df -1) #Calculate AICc
cat("AICc:", aicc,"\n") #Print AICc
## AICc: 658.2122
Residuals diagnostics
checkresiduals(M1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)
## Q* = 17.117, df = 15, p-value = 0.3119
##
## Model df: 3. Total lags used: 18
The p-value of 0.3119 exceeds the typical significance level of 0.05. Therefore, we fail to reject the null hypothesis that the residuals are independent. This suggests that there is no significant autocorrelation in the residuals, indicating that the model adequately captures the temporal dependencies in the data.
For this apply the cube root trasformation
cube_root_transformed <- sign(train_data) * abs(train_data)^(1/3)
acf(cube_root_transformed, main="ACF of Cube Root Transformed Series")
pacf(cube_root_transformed, main="PACF of Cube Root Transformed Series")
adf_result <- adf.test(cube_root_transformed)
## Warning in adf.test(cube_root_transformed): p-value smaller than printed
## p-value
kpss_result <- kpss.test(cube_root_transformed)
## Warning in kpss.test(cube_root_transformed): p-value smaller than printed
## p-value
cat("ADF Test:\n","Test Statistic:", adf_result$statistic,"\n","p-value:", adf_result$p.value,"\n\n")
## ADF Test:
## Test Statistic: -5.046206
## p-value: 0.01
cat("KPSS Test:\n","Test Statistic:", kpss_result$statistic,"\n","p-value:", kpss_result$p.value,"\n")
## KPSS Test:
## Test Statistic: 1.989008
## p-value: 0.01
The ADF test statistic is beyond the critical values, suggesting strong evidence against the null hypothesis of non-stationarity. The p-value of 0.01 further supports this, indicating that we reject the null hypothesis in favor of the alternative, implying stationarity in the time series after cube transformation.
The KPSS test statistic is higher than the critical values, indicating evidence against the null hypothesis of stationarity. The p-value of 0.01 further supports this, suggesting rejection of the null hypothesis and indicating non-stationarity in the time series after cube transformation.
if(adf_result$p.value >=0.05) {
diff_train_data <- diff(cube_root_transformed)
cat("Differencing is applied.\n")
} else {
diff_train_data <- cube_root_transformed
cat("No differencing is needed.\n"
)}
## No differencing is needed.
Now fir the ARIMA(2,1,1) model
M2 <- arima(diff_train_data, order = c(0,1,2))
print(summary(M2))
##
## Call:
## arima(x = diff_train_data, order = c(0, 1, 2))
##
## Coefficients:
## ma1 ma2
## 0.2079 -0.3647
## s.e. 0.1203 0.1692
##
## sigma^2 estimated as 0.0354: log likelihood = 24.96, aic = -43.91
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02510888 0.1872199 0.155632 0.3658239 2.579018 0.9661107
## ACF1
## Training set 0.006410136
Residuals Diagnostics
checkresiduals(M2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)
## Q* = 112.21, df = 18, p-value = 1.221e-15
##
## Model df: 2. Total lags used: 20
The ARIMA(0,1,2) model captures some aspects of the data, there are indications of potential areas for improvement, particularly in addressing the residual autocorrelation at certain lags. Refinement of the model or consideration of alternative models may be necessary to better capture the underlying patterns in the data.
The Box-Cox transformation is used to stabilize the variance of a time series. The optimal lambda parameteris chosen to maximize the log-likelihood of the transformed data. After applying the Box-Cox transformation,we’ll take the suitable diff erences to obtain a stationary series.
Apply Box-Cox Transformation
optimal_lambda <- BoxCox.lambda(train_data)
cat("Optimal Lambda:", optimal_lambda,"\n")
## Optimal Lambda: -0.3526064
Transform the series using the optimal lambda
boxcox_transformed <- forecast::BoxCox(train_data, lambda = optimal_lambda)
Plot ACF and PACF for boxcox transformed data
acf(boxcox_transformed, main="ACF of box-cox Transformed Series")
pacf(boxcox_transformed, main="PACF of box-cox Transformed Series")
Check if differencing is needed based on test results
adf_result_boxcox <- adf.test(boxcox_transformed)
## Warning in adf.test(boxcox_transformed): p-value smaller than printed p-value
print(adf_result_boxcox)
##
## Augmented Dickey-Fuller Test
##
## data: boxcox_transformed
## Dickey-Fuller = -4.6403, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
kpss_result_boxcox <- kpss.test(boxcox_transformed)
## Warning in kpss.test(boxcox_transformed): p-value smaller than printed p-value
print(kpss_result_boxcox)
##
## KPSS Test for Level Stationarity
##
## data: boxcox_transformed
## KPSS Level = 1.9971, Truncation lag parameter = 4, p-value = 0.01
The ADF test statistic is beyond the critical values, suggesting strong evidence against the null hypothesis of non-stationarity. The p-value of 0.01 further supports this, indicating that we reject the null hypothesis in favor of the alternative, implying stationarity in the transformed data.
The KPSS test statistic is higher than the critical values, indicating evidence against the null hypothesis of stationarity. The p-value of 0.01 further supports this, suggesting rejection of the null hypothesis and indicating non-stationarity in the transformed data.
if (adf_result_boxcox$p.value < 0.05 | kpss_result_boxcox$p.value > 0.05) {
# Differencing is needed
diff_boxcox_transformed <- diff(boxcox_transformed)
cat("Differencing is applied.\n")
} else {
# No differencing needed
diff_boxcox_transformed <- boxcox_transformed
cat("No differencing is needed.\n")
}
## Differencing is applied.
Choose an appropriate ARIMA model
M3 <- arima(diff_boxcox_transformed, order = c(0,0,2))
print(summary(M3))
##
## Call:
## arima(x = diff_boxcox_transformed, order = c(0, 0, 2))
##
## Coefficients:
## ma1 ma2 intercept
## -0.1869 -0.8131 0.0016
## s.e. 0.0726 0.0696 0.0001
##
## sigma^2 estimated as 0.0001706: log likelihood = 289.67, aic = -571.34
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0004409279 0.01305976 0.01105459 NaN Inf 0.7141774 0.1683811
checkresiduals(M3)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,2) with non-zero mean
## Q* = 137.35, df = 18, p-value < 2.2e-16
##
## Model df: 2. Total lags used: 20
Fit M4 using auto.arima
M4 <- auto.arima(train_data)
print("Auto ARIMA Model:")
## [1] "Auto ARIMA Model:"
print(M4)
## Series: train_data
## ARIMA(1,1,0)(1,1,0)[12]
##
## Coefficients:
## ar1 sar1
## -0.2367 -0.1832
## s.e. 0.1039 0.1093
##
## sigma^2 = 95.33: log likelihood = -324.61
## AIC=655.23 AICc=655.51 BIC=662.66
Calculate AIC, AICc, and BIC for M1, M2, M3, and M4
# Define the models
models <- list(
M1 = list(AIC = 658.07, AICc = NA, BIC = NA),
M2 = list(AIC = -43.91, AICc = NA, BIC = NA),
M3 = list(AIC = -571.34, AICc = NA, BIC = NA),
M4 = list(AIC = 655.23, AICc = 655.51, BIC = 662.66)
)
# Define the criteria
criteria <- c("AIC", "AICc", "BIC")
# Create a data frame to store the criteria values for each model
df <- data.frame(Model = names(models), stringsAsFactors = FALSE)
# Fill the data frame with the criteria values
for (crit in criteria) {
df[[crit]] <- sapply(models, function(model) model[[crit]])
}
# Print the table
print(df)
## Model AIC AICc BIC
## 1 M1 658.07 NA NA
## 2 M2 -43.91 NA NA
## 3 M3 -571.34 NA NA
## 4 M4 655.23 655.51 662.66
These criterion values to compare the quality of the models. Lower values of AIC, AICc, and BIC indicate better model fit, so in this case, model M4 appears to have the best fit according to all three criteria.
inverse_diff <- function(forecast, lag, original_data) {
forecast <- cumsum(forecast)
last_value <- tail(original_data,1)
forecast <- forecast + rep(last_value, length(forecast))
return(forecast)
}
Forecast using M1
forecast_M1 <- forecast(M1, h = length(test_data))
Back-transform if necessary (since M1 was differenced twice)
forecast_M1 <- inverse_diff(forecast_M1$mean, lag =2, original_data = train_data)
Forecast using M2
forecast_M2 <- forecast(M2, h = length(test_data))
Back-transform if necessary (since M2 involved Cube root transformation)
forecast_M2$mean <- forecast_M2$mean^3
Forecast using M3
forecast_M3 <- forecast(M3, h = length(test_data))
Back-transform if necessary (since M3 involved Box-Cox transformation)
forecast_M3$mean <- forecast::BoxCox(forecast_M3$mean, lambda = optimal_lambda)
Forecast using M4
forecast_M4 <- forecast(M4, h = length(test_data))
Print the forecasts using M1
print("Forecasts using M1:")
## [1] "Forecasts using M1:"
print(as.numeric(forecast_M1))
## [1] 354.8346 355.4525 355.7872 356.2244 356.6245 357.0380 357.4466 357.8570
## [9] 358.2668 358.6768 359.0867 359.4967 359.9066 360.3166 360.7265 361.1365
## [17] 361.5464 361.9563 362.3663 362.7762 363.1862 363.5961 364.0061 364.4160
## [25] 364.8259 365.2359 365.6458 366.0558 366.4657 366.8757 367.2856 367.6955
## [33] 368.1055 368.5154 368.9254 369.3353 369.7452 370.1552 370.5651 370.9751
## [41] 371.3850 371.7950 372.2049
Print the forecasts using M2
print("Forecasts using M2:")
## [1] "Forecasts using M2:"
print(as.numeric(forecast_M2$mean))
## [1] 368.4663 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203
## [9] 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203
## [17] 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203
## [25] 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203
## [33] 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203 357.2203
## [41] 357.2203 357.2203 357.2203
Print the forecasts using M3
print("Forecasts using M3:")
## [1] "Forecasts using M3:"
print(as.numeric(forecast_M3$mean))
## [1] -11.57729 NA -24.32141 -24.32141 -24.32141 -24.32141 -24.32141
## [8] -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141
## [15] -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141
## [22] -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141
## [29] -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141
## [36] -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141 -24.32141
## [43] -24.32141
Print the forecasts using M4
print("Forecasts using M4:")
## [1] "Forecasts using M4:"
print(as.numeric(forecast_M4$mean))
## [1] 408.3382 449.3231 439.6382 392.3946 345.4076 310.0417 346.1408 354.5912
## [9] 341.8735 394.1257 386.8585 393.4921 447.3179 487.9392 478.5630 430.8144
## [17] 383.4587 348.1598 384.0575 392.6086 379.6560 432.4116 425.0101 431.7109
## [25] 485.4473 526.1353 516.7025 469.0464 421.7582 386.4471 422.3817 430.9144
## [33] 418.0048 470.6681 463.2912 469.9797 523.7325 564.4083 554.9858 507.3128
## [41] 460.0123 424.7034 460.6312
Calculate forecasting accuracy measures
accuracy_M1 <- accuracy(forecast_M1, test_data)
Extract the mean forecast from forecast_M2
forecast_M2_mean <- as.numeric(forecast_M2$mean)
Create a new time series object with the correct time index
forecast_M2_ts <- ts(forecast_M2_mean, start = start(test_data), frequency = frequency(test_data))
Now try calculating accuracy again
accuracy_M2 <- accuracy(forecast_M2_ts, test_data)
accuracy_M3 <- accuracy(forecast_M3, test_data)
accuracy_M4 <- accuracy(forecast_M4, test_data)
Print the accuracy measures
print("M1:")
## [1] "M1:"
print(accuracy_M1)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 48.10882 73.50495 60.2496 9.939578 13.68971 0.7631654 1.711141
print("M2:")
## [1] "M2:"
print(accuracy_M2)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 54.22046 79.11202 65.01037 11.37429 14.72838 0.7759831 1.846238
print("M3:")
## [1] "M3:"
print(accuracy_M3)
## ME RMSE MAE MPE MAPE
## Training set 4.409279e-04 0.01305976 0.01105459 NaN Inf
## Test set 4.344513e+02 438.26464976 434.45131545 105.977 105.977
## MASE ACF1 Theil's U
## Training set 1.872978 0.1683811 NA
## Test set 73609.041821 0.7888269 10.95587
print("M4:")
## [1] "M4:"
print(accuracy_M4)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5432767 9.009773 6.574159 0.1587477 3.003255 0.2266600
## Test set -19.9371155 29.025225 23.783817 -5.0532062 5.911288 0.8200045
## ACF1 Theil's U
## Training set 0.002839551 NA
## Test set 0.563398888 0.7517186
Forecast 10 points ahead using the best model
forecast_10_points <- forecast(M4, h =10)
print("Forecast for the Next 10 Points:")
## [1] "Forecast for the Next 10 Points:"
print(forecast_10_points$mean)
## Jan Feb Mar Apr May Jun Jul Aug Sep
## 1957 408.3382 449.3231 439.6382 392.3946
## 1958 354.5912 341.8735 394.1257
## Oct Nov Dec
## 1957 345.4076 310.0417 346.1408
## 1958
plot(forecast_10_points, main ="Forecast for the Next 10 Points")