Plastic Sales Time Series Forecast

install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'rmarkdown' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmpu4w5f3\downloaded_packages
install.packages("forecast",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'forecast' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmpu4w5f3\downloaded_packages
install.packages("fpp",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'fpp' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmpu4w5f3\downloaded_packages
install.packages("smooth",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'smooth' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmpu4w5f3\downloaded_packages
install.packages("readxl",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/tswaminathan/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\tswaminathan\AppData\Local\Temp\Rtmpu4w5f3\downloaded_packages
library(forecast)
library(fpp)
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
library(smooth)
## Loading required package: greybox
## Package "greybox", v0.3.3 loaded.
## This is package "smooth", v2.4.7
library(readxl)

Plastics<-read.csv(file.choose()) # read the Plastic  Data
View(Plastics) # Seasonality 12 months 
windows()
plot(Plastics$Sales,type="o")

# So creating 12 dummy variables 

X<- data.frame(outer(rep(month.abb,length = 60), month.abb,"==") + 0 )# Creating dummies for 12 months
View(X)

colnames(X)<-month.abb # Assigning month names 
View(X)
Plasticsdata<-cbind(Plastics,X)
View(Plastics)
colnames(Plastics)
## [1] "Month" "Sales"
Plasticsdata["t"]<- 1:60
View(Plasticsdata)
Plasticsdata["log_Sales"]<-log(Plasticsdata["Sales"])
Plasticsdata["t_square"]<-Plasticsdata["t"]*Plasticsdata["t"]
attach(Plasticsdata)

train<-Plasticsdata[1:48,]

test<-Plasticsdata[49:60,]

########################### LINEAR MODEL #############################

linear_model<-lm(Sales~t,data=train)
summary(linear_model)
## 
## Call:
## lm(formula = Sales ~ t, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -403.95 -192.19   13.41  206.68  274.40 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  863.109     62.460  13.819  < 2e-16 ***
## t             10.575      2.219   4.765 1.92e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 213 on 46 degrees of freedom
## Multiple R-squared:  0.3305, Adjusted R-squared:  0.3159 
## F-statistic: 22.71 on 1 and 46 DF,  p-value: 1.925e-05
linear_pred<-data.frame(predict(linear_model,interval='predict',newdata =test))
View(linear_pred)
rmse_linear<-sqrt(mean((test$Sales-linear_pred$fit)^2,na.rm = T))
rmse_linear # 260.9378 and Adjusted R2 Value = 31.50
## [1] 260.9378
######################### Exponential #################################

expo_model<-lm(log_Sales~t,data=train)
summary(expo_model)
## 
## Call:
## lm(formula = log_Sales ~ t, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36696 -0.17856  0.02323  0.17933  0.26472 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.762209   0.058119 116.351  < 2e-16 ***
## t           0.009549   0.002065   4.624 3.07e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1982 on 46 degrees of freedom
## Multiple R-squared:  0.3173, Adjusted R-squared:  0.3025 
## F-statistic: 21.38 on 1 and 46 DF,  p-value: 3.066e-05
expo_pred<-data.frame(predict(expo_model,interval='predict',newdata=test))
rmse_expo<-sqrt(mean((test$Sales-exp(expo_pred$fit))^2,na.rm = T))
rmse_expo # 268.6938  and Adjusted R2 - 30.25 %
## [1] 268.6938
######################### Quadratic ####################################

Quad_model<-lm(Sales~t+t_square,data=train)
summary(Quad_model)
## 
## Call:
## lm(formula = Sales ~ t + t_square, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -403.04 -199.43    3.02  211.08  290.41 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 901.20195   96.98668   9.292 4.93e-12 ***
## t             6.00348    9.13067   0.658    0.514    
## t_square      0.09329    0.18066   0.516    0.608    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 214.7 on 45 degrees of freedom
## Multiple R-squared:  0.3344, Adjusted R-squared:  0.3048 
## F-statistic: 11.31 on 2 and 45 DF,  p-value: 0.0001052
Quad_pred<-data.frame(predict(Quad_model,interval='predict',newdata=test))
rmse_Quad<-sqrt(mean((test$Sales-Quad_pred$fit)^2,na.rm=T))
rmse_Quad # 297.4067 and Adjusted R2 - 30.48%
## [1] 297.4067
######################### Additive Seasonality #########################

sea_add_model<-lm(Sales~Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data=train)
summary(sea_add_model)
## 
## Call:
## lm(formula = Sales ~ Jan + Feb + Mar + Apr + May + Jun + Jul + 
##     Aug + Sep + Oct + Nov + Dec, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -239.00  -80.12  -14.50   85.94  250.50 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   979.00      70.70  13.847 5.44e-16 ***
## Jan          -146.50      99.99  -1.465 0.151543    
## Feb          -216.25      99.99  -2.163 0.037275 *  
## Mar          -135.75      99.99  -1.358 0.183010    
## Apr            19.50      99.99   0.195 0.846467    
## May           172.75      99.99   1.728 0.092604 .  
## Jun           290.50      99.99   2.905 0.006236 ** 
## Jul           332.00      99.99   3.320 0.002068 ** 
## Aug           410.00      99.99   4.101 0.000225 ***
## Sep           427.50      99.99   4.276 0.000134 ***
## Oct           391.00      99.99   3.911 0.000391 ***
## Nov           173.50      99.99   1.735 0.091251 .  
## Dec               NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 141.4 on 36 degrees of freedom
## Multiple R-squared:  0.7691, Adjusted R-squared:  0.6985 
## F-statistic:  10.9 on 11 and 36 DF,  p-value: 1.84e-08
sea_add_pred<-data.frame(predict(sea_add_model,newdata=test,interval='predict'))
## Warning in predict.lm(sea_add_model, newdata = test, interval = "predict"):
## prediction from a rank-deficient fit may be misleading
rmse_sea_add<-sqrt(mean((test$Sales-sea_add_pred$fit)^2,na.rm = T))
rmse_sea_add # 235.6027 and Adjusted R2 Value = 69.85
## [1] 235.6027
######################## Additive Seasonality with Linear #################

Add_sea_Linear_model<-lm(Sales~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data=train)
summary(Add_sea_Linear_model)
## 
## Call:
## lm(formula = Sales ~ t + Jan + Feb + Mar + Apr + May + Jun + 
##     Jul + Aug + Sep + Oct + Nov + Dec, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -84.388 -28.256  -6.037  16.319  95.887 
## 
## Coefficients: (1 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  721.3125    28.8624  24.991  < 2e-16 ***
## t              8.5896     0.5218  16.463  < 2e-16 ***
## Jan          -52.0146    34.7706  -1.496 0.143634    
## Feb         -130.3542    34.6883  -3.758 0.000625 ***
## Mar          -58.4437    34.6137  -1.688 0.100215    
## Apr           88.2167    34.5468   2.554 0.015177 *  
## May          232.8771    34.4876   6.752 7.99e-08 ***
## Jun          342.0375    34.4363   9.932 1.01e-11 ***
## Jul          374.9479    34.3928  10.902 8.44e-13 ***
## Aug          444.3583    34.3571  12.934 6.80e-15 ***
## Sep          453.2688    34.3294  13.204 3.72e-15 ***
## Oct          408.1792    34.3095  11.897 7.47e-14 ***
## Nov          182.0896    34.2976   5.309 6.29e-06 ***
## Dec                NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.5 on 35 degrees of freedom
## Multiple R-squared:  0.9736, Adjusted R-squared:  0.9645 
## F-statistic: 107.5 on 12 and 35 DF,  p-value: < 2.2e-16
Add_sea_Linear_pred<-data.frame(predict(Add_sea_Linear_model,interval='predict',newdata=test))
## Warning in predict.lm(Add_sea_Linear_model, interval = "predict", newdata =
## test): prediction from a rank-deficient fit may be misleading
rmse_Add_sea_Linear<-sqrt(mean((test$Sales-Add_sea_Linear_pred$fit)^2,na.rm=T))
rmse_Add_sea_Linear # 135.5536 and Adjusted R2 - 96.45%
## [1] 135.5536
######################## Additive Seasonality with Quadratic #################

Add_sea_Quad_model<-lm(Sales~t+t_square+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data=train)
summary(Add_sea_Quad_model)
## 
## Call:
## lm(formula = Sales ~ t + t_square + Jan + Feb + Mar + Apr + May + 
##     Jun + Jul + Aug + Sep + Oct + Nov + Dec, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -87.014 -25.254   4.075  25.251  52.423 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  778.21577   26.68286  29.165  < 2e-16 ***
## t              1.44020    1.67520   0.860   0.3960    
## t_square       0.14591    0.03308   4.410 9.85e-05 ***
## Jan          -52.01458   28.13679  -1.849   0.0732 .  
## Feb         -128.89511   28.07214  -4.592 5.78e-05 ***
## Mar          -55.81745   28.01612  -1.992   0.0544 .  
## Apr           91.71841   27.96692   3.280   0.0024 ** 
## May          236.96245   27.92315   8.486 6.54e-10 ***
## Jun          346.41467   27.88389  12.423 3.42e-14 ***
## Jul          379.32509   27.84871  13.621 2.50e-15 ***
## Aug          448.44370   27.81761  16.121  < 2e-16 ***
## Sep          456.77049   27.79107  16.436  < 2e-16 ***
## Oct          410.80547   27.77007  14.793 2.24e-16 ***
## Nov          183.54864   27.75602   6.613 1.39e-07 ***
## Dec                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39.25 on 34 degrees of freedom
## Multiple R-squared:  0.9832, Adjusted R-squared:  0.9768 
## F-statistic: 153.1 on 13 and 34 DF,  p-value: < 2.2e-16
Add_sea_Quad_pred<-data.frame(predict(Add_sea_Quad_model,interval='predict',newdata=test))
## Warning in predict.lm(Add_sea_Quad_model, interval = "predict", newdata =
## test): prediction from a rank-deficient fit may be misleading
rmse_Add_sea_Quad<-sqrt(mean((test$Sales-Add_sea_Quad_pred$fit)^2,na.rm=T))
rmse_Add_sea_Quad # 218.1939 and Adjusted R2 - 97.68 %
## [1] 218.1939
######################## Multiplicative Seasonality #########################

multi_sea_model<-lm(log_Sales~Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train)
summary(multi_sea_model)
## 
## Call:
## lm(formula = log_Sales ~ Jan + Feb + Mar + Apr + May + Jun + 
##     Jul + Aug + Sep + Oct + Nov + Dec, data = train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.210538 -0.083185 -0.007374  0.085859  0.223878 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.87367    0.06189 111.070  < 2e-16 ***
## Jan         -0.15547    0.08752  -1.776 0.084122 .  
## Feb         -0.24072    0.08752  -2.751 0.009252 ** 
## Mar         -0.13990    0.08752  -1.599 0.118663    
## Apr          0.02883    0.08752   0.329 0.743730    
## May          0.17203    0.08752   1.966 0.057098 .  
## Jun          0.26838    0.08752   3.067 0.004094 ** 
## Jul          0.30112    0.08752   3.441 0.001486 ** 
## Aug          0.35865    0.08752   4.098 0.000226 ***
## Sep          0.36963    0.08752   4.223 0.000156 ***
## Oct          0.34059    0.08752   3.892 0.000413 ***
## Nov          0.16661    0.08752   1.904 0.064970 .  
## Dec               NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1238 on 36 degrees of freedom
## Multiple R-squared:  0.7916, Adjusted R-squared:  0.728 
## F-statistic: 12.43 on 11 and 36 DF,  p-value: 3.264e-09
multi_sea_pred<-data.frame(predict(multi_sea_model,newdata=test,interval='predict'))
## Warning in predict.lm(multi_sea_model, newdata = test, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
rmse_multi_sea<-sqrt(mean((test$Sales-exp(multi_sea_pred$fit))^2,na.rm = T))
rmse_multi_sea # 239.6543
## [1] 239.6543
######################## Multiplicative Seasonality Linear trend ##########################

multi_add_sea_model<-lm(log_Sales~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = train)
summary(multi_add_sea_model) 
## 
## Call:
## lm(formula = log_Sales ~ t + Jan + Feb + Mar + Apr + May + Jun + 
##     Jul + Aug + Sep + Oct + Nov + Dec, data = train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.073269 -0.021672  0.001997  0.020757  0.086610 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.6448891  0.0222743 298.320  < 2e-16 ***
## t            0.0076260  0.0004027  18.939  < 2e-16 ***
## Jan         -0.0715828  0.0268339  -2.668  0.01149 *  
## Feb         -0.1644641  0.0267704  -6.144 5.00e-07 ***
## Mar         -0.0712705  0.0267128  -2.668  0.01148 *  
## Apr          0.0898412  0.0266612   3.370  0.00184 ** 
## May          0.2254098  0.0266155   8.469 5.42e-10 ***
## Jun          0.3141391  0.0265759  11.820 8.96e-14 ***
## Jul          0.3392452  0.0265423  12.781 9.60e-15 ***
## Aug          0.3891559  0.0265148  14.677  < 2e-16 ***
## Sep          0.3925089  0.0264934  14.815  < 2e-16 ***
## Oct          0.3558417  0.0264781  13.439 2.21e-15 ***
## Nov          0.1742362  0.0264689   6.583 1.33e-07 ***
## Dec                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03743 on 35 degrees of freedom
## Multiple R-squared:  0.9815, Adjusted R-squared:  0.9751 
## F-statistic: 154.5 on 12 and 35 DF,  p-value: < 2.2e-16
multi_add_sea_pred<-data.frame(predict(multi_add_sea_model,newdata=test,interval='predict'))
## Warning in predict.lm(multi_add_sea_model, newdata = test, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
rmse_multi_add_sea<-sqrt(mean((test$Sales-exp(multi_add_sea_pred$fit))^2,na.rm = T))
rmse_multi_add_sea # 160.6833 and Adjusted R2 - 97.51%
## [1] 160.6833
# Preparing table on model and it's RMSE values 

table_rmse<-data.frame(c("rmse_linear","rmse_expo","rmse_Quad","rmse_sea_add","rmse_Add_sea_Quad","rmse_multi_sea","rmse_multi_add_sea"),c(rmse_linear,rmse_expo,rmse_Quad,rmse_sea_add,rmse_Add_sea_Quad,rmse_multi_sea,rmse_multi_add_sea))
View(table_rmse)
colnames(table_rmse)<-c("model","RMSE")
View(table_rmse)

# Multiplicative Seasonality Linear trend  has least RMSE value

new_model<-lm(log_Sales~t+Jan+Feb+Mar+Apr+May+Jun+Jul+Aug+Sep+Oct+Nov+Dec,data = Plasticsdata)
new_model_pred<-data.frame(predict(new_model,newdata=Plasticsdata,interval='predict'))
## Warning in predict.lm(new_model, newdata = Plasticsdata, interval =
## "predict"): prediction from a rank-deficient fit may be misleading
new_model_fin <- exp(new_model$fitted.values)

View(new_model_fin)

Month <- as.data.frame(Plasticsdata$Month)

Final <- as.data.frame(cbind(Month,Plasticsdata$Sales, new_model_fin))
colnames(Final) <-c("Month","Sales","New_Pred_Value")
plot(Final$Sales,main = "ActualGraph", xlab="Sales(Actual)", ylab="Months",
     col.axis="blue",type="o") 

plot(Final$New_Pred_Value, main = "PredictedGraph", xlab="Sales(Predicted)", ylab="Months",
     col.axis="Green",type="s")

View(Final)


# plot(Final$new_model_fin,type="o")