Airline Passenger Time Series Forecast
install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
##
## There is a binary version available but the source version is
## later:
## binary source needs_compilation
## rmarkdown 1.10 1.11 FALSE
## installing the source package 'rmarkdown'
install.packages("forecast",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'forecast' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2bxOCI\downloaded_packages
install.packages("fpp",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'fpp' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2bxOCI\downloaded_packages
install.packages("smooth",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'smooth' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2bxOCI\downloaded_packages
install.packages("readxl",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2bxOCI\downloaded_packages
library(forecast)
library(fpp)
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: tseries
library(smooth)
## Loading required package: greybox
## Package "greybox", v0.3.3 loaded.
## This is package "smooth", v2.4.7
library(readxl)
# Tutorial/Assignments/Forecasting/Airlines Data/")
Airlines<-read_excel(file.choose()) # read the Airlines data
View(Airlines) # Seasonality 12 months
windows()
plot(Airlines$Passengers,type="o")

# So creating 12 dummy variables
X<- data.frame(outer(rep(month.abb,length = 96), month.abb,"==") + 0 )# Creating dummies for 12 months
# View(X)
colnames(X)<-month.abb # Assigning month names
# View(X)
AirlinesData<-cbind(Airlines,X)
View(AirlinesData)
colnames(AirlinesData)
## [1] "Month" "Passengers" "Jan" "Feb" "Mar"
## [6] "Apr" "May" "Jun" "Jul" "Aug"
## [11] "Sep" "Oct" "Nov" "Dec"
AirlinesData["t"]<- 1:96
View(AirlinesData)
AirlinesData["log_Passenger"]<-log(AirlinesData["Passengers"])
AirlinesData["t_square"]<-AirlinesData["t"]*AirlinesData["t"]
attach(AirlinesData)
train<-AirlinesData[1:84,]
test<-AirlinesData[85:96,]
########################### LINEAR MODEL #############################
linear_model<-lm(Passengers~t,data=train)
summary(linear_model)
##
## Call:
## lm(formula = Passengers ~ t, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -55.419 -17.202 -0.705 16.546 88.438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 106.2708 5.9287 17.93 <2e-16 ***
## t 2.1429 0.1212 17.69 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.93 on 82 degrees of freedom
## Multiple R-squared: 0.7923, Adjusted R-squared: 0.7898
## F-statistic: 312.8 on 1 and 82 DF, p-value: < 2.2e-16
linear_pred<-data.frame(predict(linear_model,interval='predict',newdata =test))
View(linear_pred)
rmse_linear<-sqrt(mean((test$Passengers-linear_pred$fit)^2,na.rm = T))
rmse_linear # 53.19924
## [1] 53.19924
######################### Exponential #################################
expo_model<-lm(log_Passenger~t,data=train)
summary(expo_model)
##
## Call:
## lm(formula = log_Passenger ~ t, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28906 -0.07775 -0.01528 0.07901 0.25104
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.770262 0.027693 172.26 <2e-16 ***
## t 0.011087 0.000566 19.59 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1258 on 82 degrees of freedom
## Multiple R-squared: 0.8239, Adjusted R-squared: 0.8218
## F-statistic: 383.7 on 1 and 82 DF, p-value: < 2.2e-16
expo_pred<-data.frame(predict(expo_model,interval='predict',newdata=test))
rmse_expo<-sqrt(mean((test$Passengers-exp(expo_pred$fit))^2,na.rm = T))
rmse_expo # 46.05736 and Adjusted R2 - 82.18 %
## [1] 46.05736
######################### Quadratic ####################################
Quad_model<-lm(Passengers~t+t_square,data=train)
summary(Quad_model)
##
## Call:
## lm(formula = Passengers ~ t + t_square, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.985 -15.652 -3.801 16.360 83.241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.148e+02 8.996e+00 12.758 < 2e-16 ***
## t 1.549e+00 4.885e-01 3.172 0.00214 **
## t_square 6.982e-03 5.569e-03 1.254 0.21350
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.83 on 81 degrees of freedom
## Multiple R-squared: 0.7963, Adjusted R-squared: 0.7912
## F-statistic: 158.3 on 2 and 81 DF, p-value: < 2.2e-16
Quad_pred<-data.frame(predict(Quad_model,interval='predict',newdata=test))
rmse_Quad<-sqrt(mean((test$Passengers-Quad_pred$fit)^2,na.rm=T))
rmse_Quad # 48.05189 and Adjusted R2 - 79.12%
## [1] 48.05189
######################### Additive Seasonality #########################
sea_add_model<-lm(Passengers~Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data=train)
summary(sea_add_model)
##
## Call:
## lm(formula = Passengers ~ Jan + Feb + Mar + Apr + May + Jun +
## Jul + Aug + Sep + Oct + Nov, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -91.571 -51.393 2.143 38.464 124.429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 189.429 21.746 8.711 7.21e-13 ***
## Jan -20.143 30.753 -0.655 0.515
## Feb -19.286 30.753 -0.627 0.533
## Mar 8.000 30.753 0.260 0.796
## Apr 1.857 30.753 0.060 0.952
## May 1.143 30.753 0.037 0.970
## Jun 25.143 30.753 0.818 0.416
## Jul 50.143 30.753 1.630 0.107
## Aug 49.286 30.753 1.603 0.113
## Sep 24.143 30.753 0.785 0.435
## Oct -1.000 30.753 -0.033 0.974
## Nov -24.286 30.753 -0.790 0.432
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 57.53 on 72 degrees of freedom
## Multiple R-squared: 0.1674, Adjusted R-squared: 0.04015
## F-statistic: 1.316 on 11 and 72 DF, p-value: 0.2337
sea_add_pred<-data.frame(predict(sea_add_model,newdata=test,interval='predict'))
rmse_sea_add<-sqrt(mean((test$Passengers-sea_add_pred$fit)^2,na.rm = T))
rmse_sea_add # 132.8198
## [1] 132.8198
######################## Additive Seasonality with Linear #################
Add_sea_Linear_model<-lm(Passengers~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data=train)
summary(Add_sea_Linear_model)
##
## Call:
## lm(formula = Passengers ~ t + Jan + Feb + Mar + Apr + May + Jun +
## Jul + Aug + Sep + Oct + Nov, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.952 -8.679 -0.286 6.976 46.714
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 85.80952 5.87255 14.612 < 2e-16 ***
## t 2.15873 0.06117 35.289 < 2e-16 ***
## Jan 3.60317 7.22378 0.499 0.61947
## Feb 2.30159 7.21834 0.319 0.75077
## Mar 27.42857 7.21341 3.802 0.00030 ***
## Apr 19.12698 7.20900 2.653 0.00983 **
## May 16.25397 7.20511 2.256 0.02716 *
## Jun 38.09524 7.20173 5.290 1.30e-06 ***
## Jul 60.93651 7.19887 8.465 2.30e-12 ***
## Aug 57.92063 7.19653 8.048 1.36e-11 ***
## Sep 30.61905 7.19471 4.256 6.26e-05 ***
## Oct 3.31746 7.19341 0.461 0.64608
## Nov -22.12698 7.19263 -3.076 0.00298 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.46 on 71 degrees of freedom
## Multiple R-squared: 0.9551, Adjusted R-squared: 0.9475
## F-statistic: 125.8 on 12 and 71 DF, p-value: < 2.2e-16
Add_sea_Linear_pred<-data.frame(predict(Add_sea_Linear_model,interval='predict',newdata=test))
rmse_Add_sea_Linear<-sqrt(mean((test$Passengers-Add_sea_Linear_pred$fit)^2,na.rm=T))
rmse_Add_sea_Linear # 35.34896 and Adjusted R2 - 94.75%
## [1] 35.34896
######################## Additive Seasonality with Quadratic #################
Add_sea_Quad_model<-lm(Passengers~t+t_square+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data=train)
summary(Add_sea_Quad_model)
##
## Call:
## lm(formula = Passengers ~ t + t_square + Jan + Feb + Mar + Apr +
## May + Jun + Jul + Aug + Sep + Oct + Nov, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.297 -7.694 0.392 7.735 40.922
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.003657 6.438877 14.755 < 2e-16 ***
## t 1.507479 0.233462 6.457 1.20e-08 ***
## t_square 0.007662 0.002660 2.881 0.005263 **
## Jan 3.603175 6.878882 0.524 0.602071
## Feb 2.378205 6.873752 0.346 0.730393
## Mar 27.566483 6.869176 4.013 0.000148 ***
## Apr 19.310867 6.865106 2.813 0.006367 **
## May 16.468498 6.861505 2.400 0.019052 *
## Jun 38.325091 6.858349 5.588 4.11e-07 ***
## Jul 61.166361 6.855628 8.922 3.67e-13 ***
## Aug 58.135165 6.853340 8.483 2.36e-12 ***
## Sep 30.802930 6.851500 4.496 2.68e-05 ***
## Oct 3.455372 6.850131 0.504 0.615547
## Nov -22.050366 6.849272 -3.219 0.001949 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.81 on 70 degrees of freedom
## Multiple R-squared: 0.9598, Adjusted R-squared: 0.9524
## F-statistic: 128.7 on 13 and 70 DF, p-value: < 2.2e-16
Add_sea_Quad_pred<-data.frame(predict(Add_sea_Quad_model,interval='predict',newdata=test))
rmse_Add_sea_Quad<-sqrt(mean((test$Passengers-Add_sea_Quad_pred$fit)^2,na.rm=T))
rmse_Add_sea_Quad # 26.36082 and Adjusted R2 - 95.24%
## [1] 26.36082
######################## Multiplicative Seasonality #########################
multi_sea_model<-lm(log_Passenger~Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data = train)
summary(multi_sea_model)
##
## Call:
## lm(formula = log_Passenger ~ Jan + Feb + Mar + Apr + May + Jun +
## Jul + Aug + Sep + Oct + Nov, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43858 -0.28085 0.04875 0.22435 0.46174
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.208117 0.111153 46.856 <2e-16 ***
## Jan -0.112831 0.157194 -0.718 0.475
## Feb -0.097238 0.157194 -0.619 0.538
## Mar 0.047126 0.157194 0.300 0.765
## Apr 0.011394 0.157194 0.072 0.942
## May 0.001676 0.157194 0.011 0.992
## Jun 0.120037 0.157194 0.764 0.448
## Jul 0.227301 0.157194 1.446 0.153
## Aug 0.227676 0.157194 1.448 0.152
## Sep 0.120513 0.157194 0.767 0.446
## Oct -0.006967 0.157194 -0.044 0.965
## Nov -0.138701 0.157194 -0.882 0.381
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2941 on 72 degrees of freedom
## Multiple R-squared: 0.1548, Adjusted R-squared: 0.02568
## F-statistic: 1.199 on 11 and 72 DF, p-value: 0.3036
multi_sea_pred<-data.frame(predict(multi_sea_model,newdata=test,interval='predict'))
rmse_multi_sea<-sqrt(mean((test$Passengers-exp(multi_sea_pred$fit))^2,na.rm = T))
rmse_multi_sea # 140.0632
## [1] 140.0632
######################## Multiplicative Seasonality Linear trend ##########################
multi_add_sea_model<-lm(log_Passenger~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data = train)
summary(multi_add_sea_model)
##
## Call:
## lm(formula = log_Passenger ~ t + Jan + Feb + Mar + Apr + May +
## Jun + Jul + Aug + Sep + Oct + Nov, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.142864 -0.031286 0.000823 0.031275 0.105860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.6712626 0.0216310 215.952 < 2e-16 ***
## t 0.0111845 0.0002253 49.637 < 2e-16 ***
## Jan 0.0101977 0.0266082 0.383 0.702676
## Feb 0.0146065 0.0265881 0.549 0.584480
## Mar 0.1477860 0.0265700 5.562 4.41e-07 ***
## Apr 0.1008701 0.0265537 3.799 0.000304 ***
## May 0.0799669 0.0265394 3.013 0.003582 **
## Jun 0.1871442 0.0265270 7.055 9.31e-10 ***
## Jul 0.2832229 0.0265164 10.681 < 2e-16 ***
## Aug 0.2724138 0.0265078 10.277 1.08e-15 ***
## Sep 0.1540663 0.0265011 5.814 1.61e-07 ***
## Oct 0.0154017 0.0264963 0.581 0.562893
## Nov -0.1275166 0.0264934 -4.813 8.11e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04956 on 71 degrees of freedom
## Multiple R-squared: 0.9763, Adjusted R-squared: 0.9723
## F-statistic: 244 on 12 and 71 DF, p-value: < 2.2e-16
multi_add_sea_pred<-data.frame(predict(multi_add_sea_model,newdata=test,interval='predict'))
rmse_multi_add_sea<-sqrt(mean((test$Passengers-exp(multi_add_sea_pred$fit))^2,na.rm = T))
rmse_multi_add_sea # 10.51917 and Adjusted R2 - 97.23%
## [1] 10.51917
# Preparing table on model and it's RMSE values
table_rmse<-data.frame(c("rmse_linear","rmse_expo","rmse_Quad","rmse_sea_add","rmse_Add_sea_Quad","rmse_multi_sea","rmse_multi_add_sea"),c(rmse_linear,rmse_expo,rmse_Quad,rmse_sea_add,rmse_Add_sea_Quad,rmse_multi_sea,rmse_multi_add_sea))
View(table_rmse)
colnames(table_rmse)<-c("model","RMSE")
View(table_rmse)
# Multiplicative Seasonality Linear trend has least RMSE value
new_model<-lm(log_Passenger~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data = AirlinesData)
new_model_pred<-data.frame(predict(new_model,newdata=AirlinesData,interval='predict'))
new_model_fin <- exp(new_model$fitted.values)
View(new_model_fin)
pred_res<- predict(arima(log_Passenger,order=c(1,0,0)),n.ahead = 12)
Month <- as.data.frame(Airlines$Month)
Final <- as.data.frame(cbind(Month,AirlinesData$Passengers,new_model_fin))
colnames(Final) <-c("Month","Passengers","New_Pred_Value")
Final <- as.data.frame(Final)
View(Final)
# plot(Final$new_model_fin,type="o")