SALES FORECASTING WITH PROPHET



PROPHET WEB PAGE




CREATING DATA FRAME FOR USE IN PROPHET FUNCTION AND THEN CREATING PLOT GRAPHS FOR VISUAL OUTPUT

First forecast attempt

library(prophet)
## Warning: package 'prophet' was built under R version 3.6.1
## Loading required package: Rcpp
## Loading required package: rlang
## Warning: package 'rlang' was built under R version 3.6.1
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.1
library(tidyr)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.6.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plyr)
## Warning: package 'plyr' was built under R version 3.6.1
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(readxl)
## Warning: package 'readxl' was built under R version 3.6.1
library(grid)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.1
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
df<-read_excel("C:\\Users\\johnsuh\\Documents\\storeb.xlsx")
names(df) <- c("ds", "y")
m <- prophet(df)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
future <- make_future_dataframe(m, periods = 365)
forecast <- predict(m, future)
plot(m, forecast,xlabel = "Time Period", ylabel = "Sales Forecast 1st Attempt")

prophet_plot_components(m, forecast)

dyplot.prophet(m, forecast)
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo

Write to csv for SQL import

forecast<-subset(forecast,select = c("ds","yhat","yhat_lower","yhat_upper"))
write.csv(forecast,"C:\\Users\\johnsuh\\Documents\\forecasta.csv", row.names = TRUE)


SQL CODE TO CREATE DATA FOR IMPORT INTO R FOR ANALYSIS



wkperresults<-read.csv("C:\\Users\\johnsuh\\Documents\\resultsperwk2.csv", sep=",")
wkperresults<-subset(wkperresults,select = c("WkPeriodActSales","WkPeriodFCSales","Yr","WkPeriod","WkPeriodVar"))
wkperresults
##    WkPeriodActSales WkPeriodFCSales   Yr WkPeriod WkPeriodVar
## 1            241000          239000 2019        1       -2000
## 2            295000          229000 2019        2      -66000
## 3            323000          216000 2019        3     -107000
## 4            198000          210000 2019        4       12000
## 5            190000          212000 2019        5       22000
## 6            184000          213000 2019        6       29000
## 7            171000          211000 2019        7       40000
## 8            169000          211000 2019        8       42000
## 9            184000          216000 2019        9       32000
## 10           311000          218000 2019       10      -93000
## 11           222000          213000 2019       11       -9000
## 12           171000          201000 2019       12       30000
## 13           153000          196000 2019       13       43000
## 14           232000          202000 2019       14      -30000
## 15           260000          214000 2019       15      -46000
## 16           189000          216000 2019       16       27000
## 17           153000          207000 2019       17       54000
## 18           197000          198000 2019       18        1000
## 19           273000          202000 2019       19      -71000
## 20           194000          216000 2019       20       22000
## 21           183000          227000 2019       21       44000
## 22           184000          225000 2019       22       41000
## 23           259000          218000 2019       23      -41000
## 24           276000          220000 2019       24      -56000
## 25           186000          233000 2019       25       47000
## 26           172000          247000 2019       26       75000
## 27           312000          249000 2019       27      -63000
## 28           285000          239000 2019       28      -46000
## 29           216000          227000 2019       29       11000
## 30           181000          223000 2019       30       42000
## 31           204000          225000 2019       31       21000
## 32           288000          226000 2019       32      -62000
## 33           244000          224000 2019       33      -20000
## 34           181000          222000 2019       34       41000
## 35           180000          223000 2019       35       43000
## 36           249000          221000 2019       36      -28000
## 37           283000          214000 2019       37      -69000
## 38           195000          206000 2019       38       11000
## 39           158000          204000 2019       39       46000
wkperresults2<- gather(wkperresults, type, sales, WkPeriodActSales:WkPeriodFCSales) 
plot1 <- ggplot(wkperresults2, aes(WkPeriod, sales, fill=type))
plot1 <- plot1 + geom_bar(stat = "identity", position = 'dodge', colour="black")
plot1 <- plot1 + guides(fill=guide_legend(title=NULL))
plot1 <- plot1 + ggtitle("SalesFC1")
plot1 <- plot1 + theme_fivethirtyeight() + scale_fill_fivethirtyeight() 
plot1 <- plot1 + labs(x="",y="")
plot1

hist1<-hist(wkperresults$WkPeriodVar,main="SalesFC1")

hist1
## $breaks
##  [1] -120000 -100000  -80000  -60000  -40000  -20000       0   20000
##  [9]   40000   60000   80000
## 
## $counts
##  [1]  1  1  5  4  3  2  4  8 10  1
## 
## $density
##  [1] 1.282051e-06 1.282051e-06 6.410256e-06 5.128205e-06 3.846154e-06
##  [6] 2.564103e-06 5.128205e-06 1.025641e-05 1.282051e-05 1.282051e-06
## 
## $mids
##  [1] -110000  -90000  -70000  -50000  -30000  -10000   10000   30000
##  [9]   50000   70000
## 
## $xname
## [1] "wkperresults$WkPeriodVar"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
ggplot1<-ggplot(wkperresults, aes(x = WkPeriod, y = WkPeriodVar)) + 
     geom_line(color = "#00AFBB", size = 1)
ggplot1


POSSIBLE WAYS TO IMPROVE FORECASTING IN PROPHET BY ADDING HOLIDAY EVENTS




#### Second forecast attempt

df<-read_excel("C:\\Users\\johnsuh\\Documents\\storeb.xlsx")
h1<-read_excel("C:\\Users\\johnsuh\\Documents\\DataForR\\holiday1.xlsx")
h2<-read_excel("C:\\Users\\johnsuh\\Documents\\DataForR\\holiday2.xlsx")
names(df) <- c("ds", "y")
holidays1<-data.frame(holiday='holiday1',ds=as.Date(h1$Date1))
holidays2<-data.frame(holiday='holiday2',
                      ds=as.Date(h2$Date1),
                      lower_window=-3,upper_window=0)
holidays<-bind_rows(holidays1,holidays2)
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
m <- prophet(df,holidays=holidays,holidays.prior.scale = 0.05)
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
future <- make_future_dataframe(m, periods = 365)
forecast <- predict(m, future)
plot(m, forecast,xlabel = "Time Period", ylabel = "Sales Forecast 2nd Attempt")

prophet_plot_components(m, forecast)

dyplot.prophet(m, forecast)
forecast<-subset(forecast,select = c("ds","yhat","yhat_lower","yhat_upper"))
write.csv(forecast,"C:\\Users\\johnsuh\\Documents\\forecastb.csv", row.names = TRUE)
wkperresults<-read.csv("C:\\Users\\johnsuh\\Documents\\resultsperwk3.csv", sep=",")
wkperresults<-subset(wkperresults,select = c("WkPeriodActSales","WkPeriodFCSales","Yr","WkPeriod","WkPeriodVar"))
wkperresults
##    WkPeriodActSales WkPeriodFCSales   Yr WkPeriod WkPeriodVar
## 1            241000          260000 2019        1       19000
## 2            295000          265000 2019        2      -30000
## 3            323000          179000 2019        3     -144000
## 4            198000          195000 2019        4       -3000
## 5            190000          209000 2019        5       19000
## 6            184000          259000 2019        6       75000
## 7            171000          198000 2019        7       27000
## 8            169000          185000 2019        8       16000
## 9            184000          207000 2019        9       23000
## 10           311000          267000 2019       10      -44000
## 11           222000          209000 2019       11      -13000
## 12           171000          173000 2019       12        2000
## 13           153000          168000 2019       13       15000
## 14           232000          255000 2019       14       23000
## 15           260000          235000 2019       15      -25000
## 16           189000          181000 2019       16       -8000
## 17           153000          174000 2019       17       21000
## 18           197000          224000 2019       18       27000
## 19           273000          255000 2019       19      -18000
## 20           194000          183000 2019       20      -11000
## 21           183000          190000 2019       21        7000
## 22           184000          218000 2019       22       34000
## 23           259000          271000 2019       23       12000
## 24           276000          231000 2019       24      -45000
## 25           186000          195000 2019       25        9000
## 26           172000          198000 2019       26       26000
## 27           312000          324000 2019       27       12000
## 28           285000          259000 2019       28      -26000
## 29           216000          190000 2019       29      -26000
## 30           181000          194000 2019       30       13000
## 31           204000          237000 2019       31       33000
## 32           288000          277000 2019       32      -11000
## 33           244000          207000 2019       33      -37000
## 34           181000          192000 2019       34       11000
## 35           180000          192000 2019       35       12000
## 36           249000          269000 2019       36       20000
## 37           283000          246000 2019       37      -37000
## 38           195000          170000 2019       38      -25000
## 39           158000          170000 2019       39       12000
wkperresults2<- gather(wkperresults, type, sales, WkPeriodActSales:WkPeriodFCSales) 
plot2 <- ggplot(wkperresults2, aes(WkPeriod, sales, fill=type))
plot2 <- plot2 + geom_bar(stat = "identity", position = 'dodge', colour="black")
plot2 <- plot2 + guides(fill=guide_legend(title=NULL))
plot2 <- plot2 + ggtitle("SalesFC2")
plot2 <- plot2 + theme_fivethirtyeight() + scale_fill_fivethirtyeight() 
plot2 <- plot2 + labs(x="",y="")
plot2

hist2<-hist(wkperresults$WkPeriodVar,main="SalesFC2")

hist2
## $breaks
## [1] -150000 -100000  -50000       0   50000  100000
## 
## $counts
## [1]  1  0 15 22  1
## 
## $density
## [1] 5.128205e-07 0.000000e+00 7.692308e-06 1.128205e-05 5.128205e-07
## 
## $mids
## [1] -125000  -75000  -25000   25000   75000
## 
## $xname
## [1] "wkperresults$WkPeriodVar"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
ggplot2<-ggplot(wkperresults, aes(x = WkPeriod, y = WkPeriodVar)) + 
     geom_line(color = "#00AFBB", size = 1)
ggplot2

CHART COMPARISONS OF BOTH FORECASTS


grid.arrange(plot1, plot2, ncol = 2)

ggplot1

ggplot2