library(readr)
Month_Value_1 <- read_csv("~/Downloads/Month_Value_1.csv")
## Rows: 96 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Period
## dbl (4): Revenue, Sales_quantity, Average_cost, The_average_annual_payroll_o...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Month_Value_1)
library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ purrr   0.3.5      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(timeSeries)
## Loading required package: timeDate
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✔ forecast  8.19     ✔ expsmooth 2.3 
## ✔ fma       2.4
library(forecast)
head(Month_Value_1)
## # A tibble: 6 × 5
##   Period       Revenue Sales_quantity Average_cost The_average_annual_payroll_…¹
##   <chr>          <dbl>          <dbl>        <dbl>                         <dbl>
## 1 01.01.2015 16010072.          12729        1258.                      30024676
## 2 01.02.2015 15807587.          11636        1359.                      30024676
## 3 01.03.2015 22047146.          15922        1385.                      30024676
## 4 01.04.2015 18814583.          15227        1236.                      30024676
## 5 01.05.2015 14021480.           8620        1627.                      30024676
## 6 01.06.2015 16783929.          13160        1275.                      30024676
## # … with abbreviated variable name ¹​The_average_annual_payroll_of_the_region
str(Month_Value_1)
## spc_tbl_ [96 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Period                                  : chr [1:96] "01.01.2015" "01.02.2015" "01.03.2015" "01.04.2015" ...
##  $ Revenue                                 : num [1:96] 16010072 15807587 22047146 18814583 14021480 ...
##  $ Sales_quantity                          : num [1:96] 12729 11636 15922 15227 8620 ...
##  $ Average_cost                            : num [1:96] 1258 1359 1385 1236 1627 ...
##  $ The_average_annual_payroll_of_the_region: num [1:96] 3e+07 3e+07 3e+07 3e+07 3e+07 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Period = col_character(),
##   ..   Revenue = col_double(),
##   ..   Sales_quantity = col_double(),
##   ..   Average_cost = col_double(),
##   ..   The_average_annual_payroll_of_the_region = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
Month_Value_1%>%
  is.na()%>%
  sum()
## [1] 128
mydata<-na.omit(Month_Value_1)
  head(mydata)
## # A tibble: 6 × 5
##   Period       Revenue Sales_quantity Average_cost The_average_annual_payroll_…¹
##   <chr>          <dbl>          <dbl>        <dbl>                         <dbl>
## 1 01.01.2015 16010072.          12729        1258.                      30024676
## 2 01.02.2015 15807587.          11636        1359.                      30024676
## 3 01.03.2015 22047146.          15922        1385.                      30024676
## 4 01.04.2015 18814583.          15227        1236.                      30024676
## 5 01.05.2015 14021480.           8620        1627.                      30024676
## 6 01.06.2015 16783929.          13160        1275.                      30024676
## # … with abbreviated variable name ¹​The_average_annual_payroll_of_the_region
nrow(mydata)
## [1] 64
  mydata%>%
  is.na()%>%
  sum()
## [1] 0
#Declare revenue column as time series data
revenue_tseries<-ts(mydata[,2], start = c(2015,1), frequency = 12)
revenue_tseries
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2015 16010072 15807587 22047146 18814583 14021480 16783929 19161892 15204984
## 2016 28601586 22367074 29738609 28351008 15264604 24385658 29486517 15270117
## 2017 36007381 30396775 47678131 27013965 24948845 31101346 33848822 16454667
## 2018 44067521 36020287 46995990 35536488 29699599 33261065 35826535 23268655
## 2019 36459960 36546499 54198707 32743990 32531658 47709702 45992142 36933665
## 2020 56288301 40225243 50022165 52320693                                    
##           Sep      Oct      Nov      Dec
## 2015 20603940 20992875 14993370 27791808
## 2016 36141028 27915144 21272049 42014160
## 2017 31650093 31572206 22446371 44966126
## 2018 35423490 39831566 32999145 47221828
## 2019 48526260 44160416 36374956 58756474
## 2020

Check for trend and seasonality

#Time plot for checking trends
autoplot(revenue_tseries) +ggtitle("Time plot:Revenue by month") + ylab("dollars in millions")

-As shown in the above graph, there is a positive trend over time. There is regular patterns that are happening every year. We need to investigate more.

#first difference of the data to remove the trend
DY<-diff(revenue_tseries)
autoplot(DY) +ggtitle("Time plot:Change in revenue by month") + ylab("dollars in millions")

-We now removed the trend but we need to check for seasonality.

# checking for seasonality
ggseasonplot(DY) + ggtitle("Seasonal plot: Change in monthly revenue") + ylab("Dolars in millions")

-Seasonality is irregular. No montly partterns repeated.

-Series revenue_tseries has trend. I removed the trend by taking the first difference. Using the first difference data(DY), no seasonality pattern was detected.

-Test 3 different models to choose the best one for forecasting revenue for the next 3 years.

Model 1: benchmark method

#benchmark method to forecast
model1<-snaive(DY)
summary(model1)
## 
## Forecast method: Seasonal naive method
## 
## Model Information:
## Call: snaive(y = DY) 
## 
## Residual sd: 8068682.9697 
## 
## Error measures:
##                    ME    RMSE     MAE       MPE     MAPE MASE       ACF1
## Training set 136964.5 8068683 6476361 -58.12047 544.2507    1 -0.3608288
## 
## Forecasts:
##          Point Forecast       Lo 80    Hi 80       Lo 95      Hi 95
## May 2020      -212332.1 -10552765.4 10128101 -16026660.1 15601996.0
## Jun 2020     15178044.1   4837610.8 25518477   -636283.9 30992372.1
## Jul 2020     -1717560.1 -12057993.4  8622873 -17531888.1 14096768.0
## Aug 2020     -9058476.6 -19398909.8  1281957 -24872804.6  6755851.5
## Sep 2020     11592595.1   1252161.8 21933028  -4221732.9 27406923.1
## Oct 2020     -4365844.0 -14706277.2  5974589 -20180172.0 11448484.1
## Nov 2020     -7785459.7 -18125893.0  2554974 -23599787.7  8028868.3
## Dec 2020     22381517.2  12041083.9 32721950   6567189.1 38195845.2
## Jan 2021     -2468172.8 -12808606.1  7872261 -18282500.8 13346155.2
## Feb 2021    -16063057.6 -26403490.9 -5722624 -31877385.6  -248729.6
## Mar 2021      9796922.0   -543511.3 20137355  -6017406.1 25611250.0
## Apr 2021      2298527.7  -8041905.6 12638961 -13515800.3 18112855.7
## May 2021      -212332.1 -14835913.1 14411249 -22577169.2 22152505.1
## Jun 2021     15178044.1    554463.1 29801625  -7186793.1 37542881.3
## Jul 2021     -1717560.1 -16341141.1 12906021 -24082397.2 20647277.1
## Aug 2021     -9058476.6 -23682057.6  5565104 -31423313.7 13306360.6
## Sep 2021     11592595.1  -3030985.9 26216176 -10772242.1 33957432.3
## Oct 2021     -4365844.0 -18989425.0 10257737 -26730681.1 17998993.2
## Nov 2021     -7785459.7 -22409040.7  6838121 -30150296.9 14579377.5
## Dec 2021     22381517.2   7757936.2 37005098     16680.0 44746354.3
## Jan 2022     -2468172.8 -17091753.8 12155408 -24833010.0 19896664.4
## Feb 2022    -16063057.6 -30686638.6 -1439477 -38427894.8  6301779.6
## Mar 2022      9796922.0  -4826659.0 24420503 -12567915.2 32161759.1
## Apr 2022      2298527.7 -12325053.3 16922109 -20066309.5 24663364.9
checkresiduals(model1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 42.356, df = 13, p-value = 5.726e-05
## 
## Model df: 0.   Total lags used: 13

-The residual sd of 8068682.9697 suggests how well our data is fitting. The average difference bewteen the values

of the current month to the values of the same month last year is $8068682.9697.

-The error term model, ACF, has a few data lagging outside the 95% confidence interval.

-We will try the next model and see if it can do a better a job.

Model 2:Exponential Smoothing (ETS) Algorithm:

#Fit ETS method
model2<-ets(revenue_tseries)
summary(model2)
## ETS(M,A,M) 
## 
## Call:
##  ets(y = revenue_tseries) 
## 
##   Smoothing parameters:
##     alpha = 0.0969 
##     beta  = 0.0059 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 14900629.1102 
##     b = 653923.3572 
##     s = 1.2886 0.7598 0.994 1.0785 0.6882 1.0373
##            0.98 0.7657 1.0034 1.3125 0.947 1.145
## 
##   sigma:  0.1298
## 
##      AIC     AICc      BIC 
## 2227.768 2241.072 2264.469 
## 
## Training set error measures:
##                     ME    RMSE     MAE       MPE     MAPE      MASE       ACF1
## Training set -453610.2 3825696 2888750 -2.836156 9.383142 0.4062265 0.05576733
checkresiduals(model2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,A,M)
## Q* = 19.384, df = 3, p-value = 0.0002277
## 
## Model df: 16.   Total lags used: 19

-We still have some issues with the ACF, but Residual sd: 0.1298 is a sign that this model could be a good

fit. There might be a better model out there though. Below is the final model.

Model 3: ARIMA model

model3<-auto.arima(revenue_tseries, d=1, D=1, stepwise = FALSE, approximation = FALSE,trace = TRUE)
## 
##  ARIMA(0,1,0)(0,1,0)[12]                    : 1768.97
##  ARIMA(0,1,0)(0,1,1)[12]                    : Inf
##  ARIMA(0,1,0)(1,1,0)[12]                    : 1753.67
##  ARIMA(0,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,1)(0,1,0)[12]                    : 1744.455
##  ARIMA(0,1,1)(0,1,1)[12]                    : Inf
##  ARIMA(0,1,1)(1,1,0)[12]                    : 1734.879
##  ARIMA(0,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,2)(0,1,0)[12]                    : 1746.705
##  ARIMA(0,1,2)(0,1,1)[12]                    : Inf
##  ARIMA(0,1,2)(1,1,0)[12]                    : 1737.082
##  ARIMA(0,1,2)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,3)(0,1,0)[12]                    : 1746.241
##  ARIMA(0,1,3)(0,1,1)[12]                    : Inf
##  ARIMA(0,1,3)(1,1,0)[12]                    : 1739.091
##  ARIMA(0,1,3)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,4)(0,1,0)[12]                    : Inf
##  ARIMA(0,1,4)(0,1,1)[12]                    : Inf
##  ARIMA(0,1,4)(1,1,0)[12]                    : 1737.389
##  ARIMA(0,1,5)(0,1,0)[12]                    : Inf
##  ARIMA(1,1,0)(0,1,0)[12]                    : 1762.503
##  ARIMA(1,1,0)(0,1,1)[12]                    : Inf
##  ARIMA(1,1,0)(1,1,0)[12]                    : 1745.729
##  ARIMA(1,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(1,1,1)(0,1,0)[12]                    : 1746.709
##  ARIMA(1,1,1)(0,1,1)[12]                    : Inf
##  ARIMA(1,1,1)(1,1,0)[12]                    : 1737.11
##  ARIMA(1,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(1,1,2)(0,1,0)[12]                    : 1746.763
##  ARIMA(1,1,2)(0,1,1)[12]                    : Inf
##  ARIMA(1,1,2)(1,1,0)[12]                    : Inf
##  ARIMA(1,1,2)(1,1,1)[12]                    : Inf
##  ARIMA(1,1,3)(0,1,0)[12]                    : 1747.537
##  ARIMA(1,1,3)(0,1,1)[12]                    : Inf
##  ARIMA(1,1,3)(1,1,0)[12]                    : 1740.82
##  ARIMA(1,1,4)(0,1,0)[12]                    : Inf
##  ARIMA(2,1,0)(0,1,0)[12]                    : 1744.139
##  ARIMA(2,1,0)(0,1,1)[12]                    : Inf
##  ARIMA(2,1,0)(1,1,0)[12]                    : 1735.026
##  ARIMA(2,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(2,1,1)(0,1,0)[12]                    : 1745.629
##  ARIMA(2,1,1)(0,1,1)[12]                    : Inf
##  ARIMA(2,1,1)(1,1,0)[12]                    : 1737.377
##  ARIMA(2,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(2,1,2)(0,1,0)[12]                    : 1745.916
##  ARIMA(2,1,2)(0,1,1)[12]                    : Inf
##  ARIMA(2,1,2)(1,1,0)[12]                    : 1739.764
##  ARIMA(2,1,3)(0,1,0)[12]                    : Inf
##  ARIMA(3,1,0)(0,1,0)[12]                    : 1746.112
##  ARIMA(3,1,0)(0,1,1)[12]                    : Inf
##  ARIMA(3,1,0)(1,1,0)[12]                    : 1737.401
##  ARIMA(3,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(3,1,1)(0,1,0)[12]                    : Inf
##  ARIMA(3,1,1)(0,1,1)[12]                    : Inf
##  ARIMA(3,1,1)(1,1,0)[12]                    : 1739.644
##  ARIMA(3,1,2)(0,1,0)[12]                    : Inf
##  ARIMA(4,1,0)(0,1,0)[12]                    : 1747.157
##  ARIMA(4,1,0)(0,1,1)[12]                    : Inf
##  ARIMA(4,1,0)(1,1,0)[12]                    : 1739.827
##  ARIMA(4,1,1)(0,1,0)[12]                    : Inf
##  ARIMA(5,1,0)(0,1,0)[12]                    : 1749.724
## 
## 
## 
##  Best model: ARIMA(0,1,1)(1,1,0)[12]
summary(model3)
## Series: revenue_tseries 
## ARIMA(0,1,1)(1,1,0)[12] 
## 
## Coefficients:
##           ma1     sar1
##       -0.8051  -0.5628
## s.e.   0.1239   0.1349
## 
## sigma^2 = 2.847e+13:  log likelihood = -864.18
## AIC=1734.37   AICc=1734.88   BIC=1740.16
## 
## Training set error measures:
##                     ME    RMSE     MAE       MPE     MAPE      MASE       ACF1
## Training set -327961.6 4669060 3275548 -3.196471 10.09469 0.4606193 0.03397385
checkresiduals(model3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)(1,1,0)[12]
## Q* = 13.303, df = 11, p-value = 0.274
## 
## Model df: 2.   Total lags used: 13

-If you look at the residuals, there seems a new constant line developed between 2015 and 2016. Violates the residuals randomness observed in the previous model. However, the ACF

model looks perfect.

-After comparing all the 3 models, Model 2 looks the best fit for the forecast.

Forecast with model 2, Exponential Smoothing (ETS) Algorithm

#Forecast with model 2, for the next 3 years.
fcst<-forecast(model2, h=36)
autoplot(fcst)

summary(fcst)
## 
## Forecast method: ETS(M,A,M)
## 
## Model Information:
## ETS(M,A,M) 
## 
## Call:
##  ets(y = revenue_tseries) 
## 
##   Smoothing parameters:
##     alpha = 0.0969 
##     beta  = 0.0059 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 14900629.1102 
##     b = 653923.3572 
##     s = 1.2886 0.7598 0.994 1.0785 0.6882 1.0373
##            0.98 0.7657 1.0034 1.3125 0.947 1.145
## 
##   sigma:  0.1298
## 
##      AIC     AICc      BIC 
## 2227.768 2241.072 2264.469 
## 
## Error measures:
##                     ME    RMSE     MAE       MPE     MAPE      MASE       ACF1
## Training set -453610.2 3825696 2888750 -2.836156 9.383142 0.4062265 0.05576733
## 
## Forecasts:
##          Point Forecast    Lo 80     Hi 80    Lo 95     Hi 95
## May 2020       36542882 30462065  42623699 27243075  45842689
## Jun 2020       47241020 39338687  55143353 35155445  59326595
## Jul 2020       50502969 42006816  58999122 37509225  63496713
## Aug 2020       33837053 28109635  39564470 25077724  42596381
## Sep 2020       53540410 44418231  62662589 39589241  67491579
## Oct 2020       49826347 41277381  58375312 36751832  62900861
## Nov 2020       38450530 31804262  45096799 28285940  48615121
## Dec 2020       65828874 54360756  77296992 48289901  83367847
## Jan 2021       59040587 48670062  69411111 43180238  74900936
## Feb 2021       49285281 40553351  58017210 35930947  62639614
## Mar 2021       68936162 56612337  81259987 50088498  87783826
## Apr 2021       53180353 43583892  62776814 38503832  67856874
## May 2021       40949710 33488195  48411224 29538308  52361112
## Jun 2021       52881291 43148649  62613933 37996499  67766082
## Jul 2021       56473294 45971608  66974980 40412352  72534237
## Aug 2021       37798155 30694190  44902121 26933577  48662734
## Sep 2021       59747516 48395211  71099821 42385663  77109368
## Oct 2021       55547596 44874870  66220323 39225069  71870124
## Nov 2021       42823720 34501392  51146049 30095818  55551623
## Dec 2021       73245658 58844762  87646554 51221388  95269928
## Jan 2022       65630677 52573443  78687911 45661360  85599993
## Feb 2022       54735788 43714444  65757133 37880096  71591481
## Mar 2022       76490262 60899641  92080884 52646465 100334060
## Apr 2022       58955181 46789356  71121006 40349157  77561204
## May 2022       45356543 35879147  54833939 30862117  59850970
## Jun 2022       58521569 46138050  70905088 39582611  77460527
## Jul 2022       62443627 49060909  75826345 41976526  82910728
## Aug 2022       41759264 32694003  50824525 27895144  55623384
## Sep 2022       65954630 51450838  80458422 43772994  88136266
## Oct 2022       61268854 47619345  74918363 40393731  82143977
## Nov 2022       47196917 36544233  57849600 30905043  63488790
## Dec 2022       80662452 62216239  99108665 52451404 108873501
## Jan 2023       72220776 55486530  88955021 46627956  97813596
## Feb 2023       60186304 46055615  74316992 38575280  81797327
## Mar 2023       84044373 64050108 104038639 53465783 114622964
## Apr 2023       64730016 49125892  80334141 40865567  88594465

-The summary above is the company`s forecasted revenue for the next 3 years. In the summary output, There is a point forecast for

every month with 80% chance, and 95% confidence interval.

-Please note the original data set includes time length from April 2020 to 2022 but there are no values given. All

cells are NULLs. The forecasted revenue includes these years since I removed all the NULLs.

Thanks for taking the time to read!