Airline Passenger Time Series Forecast

install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## 
##   There is a binary version available but the source version is
##   later:
##           binary source needs_compilation
## rmarkdown   1.10   1.11             FALSE
## installing the source package 'rmarkdown'
install.packages("forecast",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'forecast' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2bxOCI\downloaded_packages
install.packages("fpp",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'fpp' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2bxOCI\downloaded_packages
install.packages("smooth",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'smooth' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2bxOCI\downloaded_packages
install.packages("readxl",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmp2bxOCI\downloaded_packages
library(forecast)
library(fpp)
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
library(smooth)
## Loading required package: greybox
## Package "greybox", v0.3.3 loaded.
## This is package "smooth", v2.4.7
library(readxl)
# Tutorial/Assignments/Forecasting/Airlines Data/")
Airlines<-read_excel(file.choose()) # read the Airlines data
View(Airlines) # Seasonality 12 months 
windows()
plot(Airlines$Passengers,type="o")

# So creating 12 dummy variables 

X<- data.frame(outer(rep(month.abb,length = 96), month.abb,"==") + 0 )# Creating dummies for 12 months
# View(X)

colnames(X)<-month.abb # Assigning month names 
# View(X)
AirlinesData<-cbind(Airlines,X)
View(AirlinesData)
colnames(AirlinesData)
##  [1] "Month"      "Passengers" "Jan"        "Feb"        "Mar"       
##  [6] "Apr"        "May"        "Jun"        "Jul"        "Aug"       
## [11] "Sep"        "Oct"        "Nov"        "Dec"
AirlinesData["t"]<- 1:96
View(AirlinesData)
AirlinesData["log_Passenger"]<-log(AirlinesData["Passengers"])
AirlinesData["t_square"]<-AirlinesData["t"]*AirlinesData["t"]
attach(AirlinesData)

train<-AirlinesData[1:84,]

test<-AirlinesData[85:96,]

########################### LINEAR MODEL #############################

linear_model<-lm(Passengers~t,data=train)
summary(linear_model)
## 
## Call:
## lm(formula = Passengers ~ t, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -55.419 -17.202  -0.705  16.546  88.438 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 106.2708     5.9287   17.93   <2e-16 ***
## t             2.1429     0.1212   17.69   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.93 on 82 degrees of freedom
## Multiple R-squared:  0.7923, Adjusted R-squared:  0.7898 
## F-statistic: 312.8 on 1 and 82 DF,  p-value: < 2.2e-16
linear_pred<-data.frame(predict(linear_model,interval='predict',newdata =test))
View(linear_pred)
rmse_linear<-sqrt(mean((test$Passengers-linear_pred$fit)^2,na.rm = T))
rmse_linear # 53.19924
## [1] 53.19924
######################### Exponential #################################

expo_model<-lm(log_Passenger~t,data=train)
summary(expo_model)
## 
## Call:
## lm(formula = log_Passenger ~ t, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28906 -0.07775 -0.01528  0.07901  0.25104 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.770262   0.027693  172.26   <2e-16 ***
## t           0.011087   0.000566   19.59   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1258 on 82 degrees of freedom
## Multiple R-squared:  0.8239, Adjusted R-squared:  0.8218 
## F-statistic: 383.7 on 1 and 82 DF,  p-value: < 2.2e-16
expo_pred<-data.frame(predict(expo_model,interval='predict',newdata=test))
rmse_expo<-sqrt(mean((test$Passengers-exp(expo_pred$fit))^2,na.rm = T))
rmse_expo # 46.05736  and Adjusted R2 - 82.18 %
## [1] 46.05736
######################### Quadratic ####################################

Quad_model<-lm(Passengers~t+t_square,data=train)
summary(Quad_model)
## 
## Call:
## lm(formula = Passengers ~ t + t_square, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -56.985 -15.652  -3.801  16.360  83.241 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.148e+02  8.996e+00  12.758  < 2e-16 ***
## t           1.549e+00  4.885e-01   3.172  0.00214 ** 
## t_square    6.982e-03  5.569e-03   1.254  0.21350    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.83 on 81 degrees of freedom
## Multiple R-squared:  0.7963, Adjusted R-squared:  0.7912 
## F-statistic: 158.3 on 2 and 81 DF,  p-value: < 2.2e-16
Quad_pred<-data.frame(predict(Quad_model,interval='predict',newdata=test))
rmse_Quad<-sqrt(mean((test$Passengers-Quad_pred$fit)^2,na.rm=T))
rmse_Quad # 48.05189 and Adjusted R2 - 79.12%
## [1] 48.05189
######################### Additive Seasonality #########################

sea_add_model<-lm(Passengers~Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data=train)
summary(sea_add_model)
## 
## Call:
## lm(formula = Passengers ~ Jan + Feb + Mar + Apr + May + Jun + 
##     Jul + Aug + Sep + Oct + Nov, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -91.571 -51.393   2.143  38.464 124.429 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  189.429     21.746   8.711 7.21e-13 ***
## Jan          -20.143     30.753  -0.655    0.515    
## Feb          -19.286     30.753  -0.627    0.533    
## Mar            8.000     30.753   0.260    0.796    
## Apr            1.857     30.753   0.060    0.952    
## May            1.143     30.753   0.037    0.970    
## Jun           25.143     30.753   0.818    0.416    
## Jul           50.143     30.753   1.630    0.107    
## Aug           49.286     30.753   1.603    0.113    
## Sep           24.143     30.753   0.785    0.435    
## Oct           -1.000     30.753  -0.033    0.974    
## Nov          -24.286     30.753  -0.790    0.432    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 57.53 on 72 degrees of freedom
## Multiple R-squared:  0.1674, Adjusted R-squared:  0.04015 
## F-statistic: 1.316 on 11 and 72 DF,  p-value: 0.2337
sea_add_pred<-data.frame(predict(sea_add_model,newdata=test,interval='predict'))
rmse_sea_add<-sqrt(mean((test$Passengers-sea_add_pred$fit)^2,na.rm = T))
rmse_sea_add # 132.8198
## [1] 132.8198
######################## Additive Seasonality with Linear #################

Add_sea_Linear_model<-lm(Passengers~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data=train)
summary(Add_sea_Linear_model)
## 
## Call:
## lm(formula = Passengers ~ t + Jan + Feb + Mar + Apr + May + Jun + 
##     Jul + Aug + Sep + Oct + Nov, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.952  -8.679  -0.286   6.976  46.714 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  85.80952    5.87255  14.612  < 2e-16 ***
## t             2.15873    0.06117  35.289  < 2e-16 ***
## Jan           3.60317    7.22378   0.499  0.61947    
## Feb           2.30159    7.21834   0.319  0.75077    
## Mar          27.42857    7.21341   3.802  0.00030 ***
## Apr          19.12698    7.20900   2.653  0.00983 ** 
## May          16.25397    7.20511   2.256  0.02716 *  
## Jun          38.09524    7.20173   5.290 1.30e-06 ***
## Jul          60.93651    7.19887   8.465 2.30e-12 ***
## Aug          57.92063    7.19653   8.048 1.36e-11 ***
## Sep          30.61905    7.19471   4.256 6.26e-05 ***
## Oct           3.31746    7.19341   0.461  0.64608    
## Nov         -22.12698    7.19263  -3.076  0.00298 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.46 on 71 degrees of freedom
## Multiple R-squared:  0.9551, Adjusted R-squared:  0.9475 
## F-statistic: 125.8 on 12 and 71 DF,  p-value: < 2.2e-16
Add_sea_Linear_pred<-data.frame(predict(Add_sea_Linear_model,interval='predict',newdata=test))
rmse_Add_sea_Linear<-sqrt(mean((test$Passengers-Add_sea_Linear_pred$fit)^2,na.rm=T))
rmse_Add_sea_Linear # 35.34896 and Adjusted R2 - 94.75%
## [1] 35.34896
######################## Additive Seasonality with Quadratic #################

Add_sea_Quad_model<-lm(Passengers~t+t_square+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data=train)
summary(Add_sea_Quad_model)
## 
## Call:
## lm(formula = Passengers ~ t + t_square + Jan + Feb + Mar + Apr + 
##     May + Jun + Jul + Aug + Sep + Oct + Nov, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.297  -7.694   0.392   7.735  40.922 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  95.003657   6.438877  14.755  < 2e-16 ***
## t             1.507479   0.233462   6.457 1.20e-08 ***
## t_square      0.007662   0.002660   2.881 0.005263 ** 
## Jan           3.603175   6.878882   0.524 0.602071    
## Feb           2.378205   6.873752   0.346 0.730393    
## Mar          27.566483   6.869176   4.013 0.000148 ***
## Apr          19.310867   6.865106   2.813 0.006367 ** 
## May          16.468498   6.861505   2.400 0.019052 *  
## Jun          38.325091   6.858349   5.588 4.11e-07 ***
## Jul          61.166361   6.855628   8.922 3.67e-13 ***
## Aug          58.135165   6.853340   8.483 2.36e-12 ***
## Sep          30.802930   6.851500   4.496 2.68e-05 ***
## Oct           3.455372   6.850131   0.504 0.615547    
## Nov         -22.050366   6.849272  -3.219 0.001949 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.81 on 70 degrees of freedom
## Multiple R-squared:  0.9598, Adjusted R-squared:  0.9524 
## F-statistic: 128.7 on 13 and 70 DF,  p-value: < 2.2e-16
Add_sea_Quad_pred<-data.frame(predict(Add_sea_Quad_model,interval='predict',newdata=test))
rmse_Add_sea_Quad<-sqrt(mean((test$Passengers-Add_sea_Quad_pred$fit)^2,na.rm=T))
rmse_Add_sea_Quad # 26.36082 and Adjusted R2 - 95.24%
## [1] 26.36082
######################## Multiplicative Seasonality #########################

multi_sea_model<-lm(log_Passenger~Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data = train)
summary(multi_sea_model)
## 
## Call:
## lm(formula = log_Passenger ~ Jan + Feb + Mar + Apr + May + Jun + 
##     Jul + Aug + Sep + Oct + Nov, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43858 -0.28085  0.04875  0.22435  0.46174 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.208117   0.111153  46.856   <2e-16 ***
## Jan         -0.112831   0.157194  -0.718    0.475    
## Feb         -0.097238   0.157194  -0.619    0.538    
## Mar          0.047126   0.157194   0.300    0.765    
## Apr          0.011394   0.157194   0.072    0.942    
## May          0.001676   0.157194   0.011    0.992    
## Jun          0.120037   0.157194   0.764    0.448    
## Jul          0.227301   0.157194   1.446    0.153    
## Aug          0.227676   0.157194   1.448    0.152    
## Sep          0.120513   0.157194   0.767    0.446    
## Oct         -0.006967   0.157194  -0.044    0.965    
## Nov         -0.138701   0.157194  -0.882    0.381    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2941 on 72 degrees of freedom
## Multiple R-squared:  0.1548, Adjusted R-squared:  0.02568 
## F-statistic: 1.199 on 11 and 72 DF,  p-value: 0.3036
multi_sea_pred<-data.frame(predict(multi_sea_model,newdata=test,interval='predict'))
rmse_multi_sea<-sqrt(mean((test$Passengers-exp(multi_sea_pred$fit))^2,na.rm = T))
rmse_multi_sea # 140.0632
## [1] 140.0632
######################## Multiplicative Seasonality Linear trend ##########################

multi_add_sea_model<-lm(log_Passenger~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data = train)
summary(multi_add_sea_model) 
## 
## Call:
## lm(formula = log_Passenger ~ t + Jan + Feb + Mar + Apr + May + 
##     Jun + Jul + Aug + Sep + Oct + Nov, data = train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.142864 -0.031286  0.000823  0.031275  0.105860 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.6712626  0.0216310 215.952  < 2e-16 ***
## t            0.0111845  0.0002253  49.637  < 2e-16 ***
## Jan          0.0101977  0.0266082   0.383 0.702676    
## Feb          0.0146065  0.0265881   0.549 0.584480    
## Mar          0.1477860  0.0265700   5.562 4.41e-07 ***
## Apr          0.1008701  0.0265537   3.799 0.000304 ***
## May          0.0799669  0.0265394   3.013 0.003582 ** 
## Jun          0.1871442  0.0265270   7.055 9.31e-10 ***
## Jul          0.2832229  0.0265164  10.681  < 2e-16 ***
## Aug          0.2724138  0.0265078  10.277 1.08e-15 ***
## Sep          0.1540663  0.0265011   5.814 1.61e-07 ***
## Oct          0.0154017  0.0264963   0.581 0.562893    
## Nov         -0.1275166  0.0264934  -4.813 8.11e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04956 on 71 degrees of freedom
## Multiple R-squared:  0.9763, Adjusted R-squared:  0.9723 
## F-statistic:   244 on 12 and 71 DF,  p-value: < 2.2e-16
multi_add_sea_pred<-data.frame(predict(multi_add_sea_model,newdata=test,interval='predict'))
rmse_multi_add_sea<-sqrt(mean((test$Passengers-exp(multi_add_sea_pred$fit))^2,na.rm = T))
rmse_multi_add_sea # 10.51917 and Adjusted R2 - 97.23%
## [1] 10.51917
# Preparing table on model and it's RMSE values 

table_rmse<-data.frame(c("rmse_linear","rmse_expo","rmse_Quad","rmse_sea_add","rmse_Add_sea_Quad","rmse_multi_sea","rmse_multi_add_sea"),c(rmse_linear,rmse_expo,rmse_Quad,rmse_sea_add,rmse_Add_sea_Quad,rmse_multi_sea,rmse_multi_add_sea))
View(table_rmse)
colnames(table_rmse)<-c("model","RMSE")
View(table_rmse)

# Multiplicative Seasonality Linear trend  has least RMSE value

new_model<-lm(log_Passenger~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov,data = AirlinesData)
new_model_pred<-data.frame(predict(new_model,newdata=AirlinesData,interval='predict'))
new_model_fin <- exp(new_model$fitted.values)

View(new_model_fin)

pred_res<- predict(arima(log_Passenger,order=c(1,0,0)),n.ahead = 12)
Month <- as.data.frame(Airlines$Month)

Final <- as.data.frame(cbind(Month,AirlinesData$Passengers,new_model_fin))
colnames(Final) <-c("Month","Passengers","New_Pred_Value")
Final <- as.data.frame(Final)
View(Final)

# plot(Final$new_model_fin,type="o")