Cocacola Sales Time Series Forecast
# install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
# install.packages("forecast",repos = "http://cran.us.r-project.org")
# install.packages("fpp",repos = "http://cran.us.r-project.org")
# install.packages("smooth",repos = "http://cran.us.r-project.org")
# install.packages("readxl",repos = "http://cran.us.r-project.org")
library(forecast)
## Warning: package 'forecast' was built under R version 3.5.1
library(fpp)
## Warning: package 'fpp' was built under R version 3.5.1
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.5.1
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.5.1
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.5.1
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.5.1
library(smooth)
## Warning: package 'smooth' was built under R version 3.5.1
## Loading required package: greybox
## Warning: package 'greybox' was built under R version 3.5.1
## Package "greybox", v0.3.3 loaded.
## This is package "smooth", v2.4.7
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.1
# setwd("C:/Desktop Docs_2/Desktop_Docs_New/NewDesktop_Docs_02092018/R and Python Tutorial/Assignments/Forecasting/Airlines Data/")
Cocacola <- read_excel(file.choose()) # read the Airlines data
View(Cocacola) # Quarterly 4 months
windows()
plot(Cocacola$Sales,type="o")

Q1 <- ifelse(grepl("Q1",Cocacola$Quarter),'1','0')
Q2 <- ifelse(grepl("Q2",Cocacola$Quarter),'1','0')
Q3 <- ifelse(grepl("Q3",Cocacola$Quarter),'1','0')
Q4 <- ifelse(grepl("Q4",Cocacola$Quarter),'1','0')
# So creating 12 dummy variables
CocacolaData<-cbind(Cocacola,Q1,Q2,Q3,Q4)
View(CocacolaData)
colnames(CocacolaData)
## [1] "Quarter" "Sales" "Q1" "Q2" "Q3" "Q4"
CocacolaData["t"]<- 1:42
View(CocacolaData)
CocacolaData["log_Sales"]<-log(CocacolaData["Sales"])
CocacolaData["t_square"]<-CocacolaData["t"]*CocacolaData["t"]
attach(CocacolaData)
## The following objects are masked _by_ .GlobalEnv:
##
## Q1, Q2, Q3, Q4
train<-CocacolaData[1:36,]
test<-CocacolaData[37:40,]
########################### LINEAR MODEL #############################
linear_model<-lm(Sales~t,data=train)
summary(linear_model)
##
## Call:
## lm(formula = Sales ~ t, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -485.47 -287.86 -17.94 163.36 803.06
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1537.244 118.024 13.03 9.01e-15 ***
## t 64.500 5.563 11.60 2.31e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 346.7 on 34 degrees of freedom
## Multiple R-squared: 0.7982, Adjusted R-squared: 0.7922
## F-statistic: 134.4 on 1 and 34 DF, p-value: 2.313e-13
linear_pred<-data.frame(predict(linear_model,interval='predict',newdata =test))
View(linear_pred)
rmse_linear<-sqrt(mean((test$Sales-linear_pred$fit)^2,na.rm = T))
rmse_linear # 644.018 and Adjusted R2 Vaue - 79.22%
## [1] 644.0188
######################### Exponential #################################
expo_model<-lm(log_Sales~t,data=train)
summary(expo_model)
##
## Call:
## lm(formula = log_Sales ~ t, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.21775 -0.09522 -0.01134 0.07156 0.32157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.446255 0.041266 180.44 < 2e-16 ***
## t 0.023219 0.001945 11.94 1.04e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1212 on 34 degrees of freedom
## Multiple R-squared: 0.8074, Adjusted R-squared: 0.8017
## F-statistic: 142.5 on 1 and 34 DF, p-value: 1.038e-13
expo_pred<-data.frame(predict(expo_model,interval='predict',newdata=test))
rmse_expo<-sqrt(mean((test$Sales-exp(expo_pred$fit))^2,na.rm = T))
rmse_expo # 524.7351 and Adjusted R2 - 80.17 %
## [1] 524.7351
######################### Quadratic ####################################
Quad_model<-lm(Sales~t+t_square,data=train)
summary(Quad_model)
##
## Call:
## lm(formula = Sales ~ t + t_square, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -523.55 -220.14 -35.64 253.51 531.75
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2017.6741 150.8093 13.379 7e-15 ***
## t -11.3573 18.7949 -0.604 0.549795
## t_square 2.0502 0.4927 4.161 0.000213 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 285 on 33 degrees of freedom
## Multiple R-squared: 0.8676, Adjusted R-squared: 0.8596
## F-statistic: 108.1 on 2 and 33 DF, p-value: 3.238e-15
Quad_pred<-data.frame(predict(Quad_model,interval='predict',newdata=test))
rmse_Quad<-sqrt(mean((test$Sales-Quad_pred$fit)^2,na.rm=T))
rmse_Quad # 434.7185 and Adjusted R2 - 85.96 %
## [1] 434.7185
######################### Additive Seasonality #########################
sea_add_model<-lm(Sales~Q1+Q2+Q3+Q4,data=train)
summary(sea_add_model)
##
## Call:
## lm(formula = Sales ~ Q1 + Q2 + Q3 + Q4, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -924.1 -624.8 -163.8 576.8 1522.6
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2712.9 249.3 10.884 2.74e-12 ***
## Q11 -393.9 352.5 -1.117 0.272
## Q21 238.6 352.5 0.677 0.503
## Q31 225.5 352.5 0.640 0.527
## Q41 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 747.8 on 32 degrees of freedom
## Multiple R-squared: 0.1163, Adjusted R-squared: 0.03346
## F-statistic: 1.404 on 3 and 32 DF, p-value: 0.2596
sea_add_pred<-data.frame(predict(sea_add_model,newdata=test,interval='predict'))
## Warning in predict.lm(sea_add_model, newdata = test, interval = "predict"):
## prediction from a rank-deficient fit may be misleading
rmse_sea_add<-sqrt(mean((test$Sales-sea_add_pred$fit)^2,na.rm = T))
rmse_sea_add # 1785.135
## [1] 1785.135
######################## Additive Seasonality with Linear #################
Add_sea_Linear_model<-lm(Sales~t+Q1+Q2+Q3+Q4,data=train)
summary(Add_sea_Linear_model)
##
## Call:
## lm(formula = Sales ~ t + Q1 + Q2 + Q3 + Q4, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -476.34 -157.05 -67.11 67.65 617.56
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1435.20 124.21 11.554 9.13e-13 ***
## t 63.89 4.32 14.788 1.37e-15 ***
## Q11 -202.21 126.86 -1.594 0.12110
## Q21 366.40 126.50 2.897 0.00686 **
## Q31 289.39 126.27 2.292 0.02887 *
## Q41 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 267.7 on 31 degrees of freedom
## Multiple R-squared: 0.8903, Adjusted R-squared: 0.8761
## F-statistic: 62.89 on 4 and 31 DF, p-value: 1.969e-14
Add_sea_Linear_pred<-data.frame(predict(Add_sea_Linear_model,interval='predict',newdata=test))
## Warning in predict.lm(Add_sea_Linear_model, interval = "predict", newdata =
## test): prediction from a rank-deficient fit may be misleading
rmse_Add_sea_Linear<-sqrt(mean((test$Sales-Add_sea_Linear_pred$fit)^2,na.rm=T))
rmse_Add_sea_Linear # 534.6979 and Adjusted R2 - 87.61
## [1] 534.6979
######################## Additive Seasonality with Quadratic #################
Add_sea_Quad_model<-lm(Sales~t+t_square+Q1+Q2+Q3+Q4,data=train)
summary(Add_sea_Quad_model)
##
## Call:
## lm(formula = Sales ~ t + t_square + Q1 + Q2 + Q3 + Q4, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -280.05 -134.63 28.75 93.79 341.09
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1919.0149 99.3368 19.318 < 2e-16 ***
## t -12.8328 10.6588 -1.204 0.238019
## t_square 2.0735 0.2793 7.423 2.85e-08 ***
## Q11 -202.2097 76.5664 -2.641 0.012999 *
## Q21 370.5450 76.3462 4.853 3.52e-05 ***
## Q31 293.5369 76.2125 3.852 0.000573 ***
## Q41 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 161.6 on 30 degrees of freedom
## Multiple R-squared: 0.9613, Adjusted R-squared: 0.9549
## F-statistic: 149.1 on 5 and 30 DF, p-value: < 2.2e-16
Add_sea_Quad_pred<-data.frame(predict(Add_sea_Quad_model,interval='predict',newdata=test))
## Warning in predict.lm(Add_sea_Quad_model, interval = "predict", newdata =
## test): prediction from a rank-deficient fit may be misleading
rmse_Add_sea_Quad<-sqrt(mean((test$Sales-Add_sea_Quad_pred$fit)^2,na.rm=T))
rmse_Add_sea_Quad # 236.7075 and Adjusted R2 - 95.49%
## [1] 236.7075
######################## Multiplicative Seasonality #########################
multi_sea_model<-lm(log_Sales~Q1+Q2+Q3+Q4,data = train)
summary(multi_sea_model)
##
## Call:
## lm(formula = log_Sales ~ Q1 + Q2 + Q3 + Q4, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37206 -0.21908 -0.03649 0.21230 0.45120
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.87651 0.08845 89.052 <2e-16 ***
## Q11 -0.15985 0.12509 -1.278 0.210
## Q21 0.08161 0.12509 0.652 0.519
## Q31 0.07542 0.12509 0.603 0.551
## Q41 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2653 on 32 degrees of freedom
## Multiple R-squared: 0.1315, Adjusted R-squared: 0.05006
## F-statistic: 1.615 on 3 and 32 DF, p-value: 0.2053
multi_sea_pred<-data.frame(predict(multi_sea_model,newdata=test,interval='predict'))
## Warning in predict.lm(multi_sea_model, newdata = test, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
rmse_multi_sea<-sqrt(mean((test$Sales-exp(multi_sea_pred$fit))^2,na.rm = T))
rmse_multi_sea # 1871.203
## [1] 1871.203
######################## Multiplicative Seasonality Linear trend ##########################
multi_add_sea_model<-lm(log_Sales~t+Q1+Q2+Q3+Q4,data = train)
summary(multi_add_sea_model)
##
## Call:
## lm(formula = log_Sales ~ t + Q1 + Q2 + Q3 + Q4, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.161001 -0.043058 -0.003783 0.033233 0.252528
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.417783 0.040219 184.434 < 2e-16 ***
## t 0.022936 0.001399 16.397 < 2e-16 ***
## Q11 -0.091041 0.041078 -2.216 0.03414 *
## Q21 0.127486 0.040958 3.113 0.00397 **
## Q31 0.098357 0.040887 2.406 0.02230 *
## Q41 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08668 on 31 degrees of freedom
## Multiple R-squared: 0.9102, Adjusted R-squared: 0.8986
## F-statistic: 78.56 on 4 and 31 DF, p-value: 9.001e-16
multi_add_sea_pred<-data.frame(predict(multi_add_sea_model,newdata=test,interval='predict'))
## Warning in predict.lm(multi_add_sea_model, newdata = test, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
rmse_multi_add_sea<-sqrt(mean((test$Sales-exp(multi_add_sea_pred$fit))^2,na.rm = T))
rmse_multi_add_sea # 335.1026 and Adjusted R2 - 89.86%
## [1] 335.1026
# Preparing table on model and it's RMSE values
table_rmse<-data.frame(c("rmse_linear","rmse_expo","rmse_Quad","rmse_sea_add","rmse_Add_sea_Quad","rmse_multi_sea","rmse_multi_add_sea"),c(rmse_linear,rmse_expo,rmse_Quad,rmse_sea_add,rmse_Add_sea_Quad,rmse_multi_sea,rmse_multi_add_sea))
View(table_rmse)
colnames(table_rmse)<-c("model","RMSE")
View(table_rmse)
# Additive Seasonality with Quadratic trend has least RMSE value
new_model<-lm(Sales~t+t_square+Q1+Q2+Q3+Q4,data=CocacolaData)
new_model_pred<-data.frame(predict(new_model,newdata=CocacolaData,interval='predict'))
## Warning in predict.lm(new_model, newdata = CocacolaData, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
new_model_fin <- new_model$fitted.values
View(new_model_fin)
# pred_res<- predict(arima(log_Passenger,order=c(1,0,0)),n.ahead = 12)
Quarter <- as.data.frame(CocacolaData$Quarter)
Final <- as.data.frame(cbind(Quarter,CocacolaData$Sales,new_model_fin))
colnames(Final) <-c("Quarter","Sales","New_Pred_Value")
plot(Final$Sales,main = "ActualGraph", xlab="Sales(Actual)", ylab="Quarter",
col.axis="blue",type="o")

plot(Final$New_Pred_Value, main = "PredictedGraph", xlab="Sales(Predicted)", ylab="Quarter",
col.axis="Green",type="s")

View(Final)
# plot(Final$new_model_fin,type="o")