This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Loading data
library(readxl)
data <- read_excel("SalesData.xlsx", col_types = c ("date","numeric"))
head(data)
## # A tibble: 6 x 2
## Date Sales
## <dttm> <dbl>
## 1 2019-01-01 00:00:00 161590
## 2 2019-02-01 00:00:00 137118
## 3 2019-03-01 00:00:00 137764
## 4 2019-04-01 00:00:00 172188
## 5 2019-05-01 00:00:00 125908
## 6 2019-06-01 00:00:00 121280
str(data)
## tibble [44 x 2] (S3: tbl_df/tbl/data.frame)
## $ Date : POSIXct[1:44], format: "2019-01-01" "2019-02-01" ...
## $ Sales: num [1:44] 161590 137118 137764 172188 125908 ...
date <- as.character(data$Date)
range(date)
## [1] "2019-01-01" "2022-08-01"
Creating a time series
salests <- ts(data$Sales, start = c(2019,01), end = c(2022,08), frequency = 12)
plot(salests, xlab="Year", ylab = "Sales", main = "Sales Trend")
Decompose the data
plot(decompose(salests))
analysing autocorrelation
salespre <- window(salests, start = c(2019,01), end = c(2021,12), frequency= 12)
salespost <- window(salests, start = c(2022,01), end = c(2022,08), frequency= 12)
acf(salespre)
pacf(salespre)
Time <- time(salespre)
str(Time)
## Time-Series [1:36] from 2019 to 2022: 2019 2019 2019 2019 2019 ...
Seas <- factor(cycle(salespre))
str(Seas)
## Factor w/ 12 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
sales.lm <- lm(salespre ~ 0 + Time + Seas)
coef(sales.lm)
## Time Seas1 Seas2 Seas3 Seas4 Seas5
## 14034.67 -28175636.67 -28207626.89 -28211083.44 -28185340.00 -28220929.89
## Seas6 Seas7 Seas8 Seas9 Seas10 Seas11
## -28219746.78 -28184402.33 -28209618.22 -28208688.44 -28166854.33 -28211819.89
## Seas12
## -28207222.78
new.Time <- seq(2021.917+1/12, length = 12, by=1/12)
new.Time
## [1] 2022.000 2022.084 2022.167 2022.250 2022.334 2022.417 2022.500 2022.584
## [9] 2022.667 2022.750 2022.834 2022.917
new.Seas <- factor(c(1,2,3,4,5,6,7,8,9,10,11,12))
new.data <- data.frame(Time = new.Time, Seas = new.Seas)
new.data
## Time Seas
## 1 2022.000 1
## 2 2022.084 2
## 3 2022.167 3
## 4 2022.250 4
## 5 2022.334 5
## 6 2022.417 6
## 7 2022.500 7
## 8 2022.584 8
## 9 2022.667 9
## 10 2022.750 10
## 11 2022.834 11
## 12 2022.917 12
Predict the following 12 periods
predict.lm <- predict(sales.lm, new.data)
predict.lm
## 1 2 3 4 5 6 7 8
## 202464.0 171643.3 169356.3 196269.3 161849.0 164201.7 200715.7 176669.3
## 9 10 11 12
## 178768.7 221772.3 177976.3 183743.0
sales.lm.resid <- sales.lm$residuals
acf(sales.lm.resid)
pacf(sales.lm.resid)
Applying AIC to determine best ARMA model
best.order <- c(0, 0, 0)
best.aic <- Inf
for (i in 0:3) for (j in 0:3) {
fit.aic <- AIC(arima(sales.lm.resid, order = c(i, 0,
j)))
if (fit.aic < best.aic) {
best.order <- c(i, 0, j)
resid.best.arma <- arima(sales.lm.resid, order = best.order)
best.aic <- fit.aic
}}
resid.best.arma
##
## Call:
## arima(x = sales.lm.resid, order = best.order)
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 intercept
## -0.1290 -0.4978 0.4446 1.0201 1.0000 -194.1257
## s.e. 0.1625 0.1275 0.1751 0.1253 0.1101 3296.5065
##
## sigma^2 estimated as 63796794: log likelihood = -377.01, aic = 768.03
best.order
## [1] 3 0 2
Applying Best ARMA model
resid.best.arma.pred <- predict(resid.best.arma, n.ahead = 12)
resid.best.arma.pred$pred
## Time Series:
## Start = 37
## End = 48
## Frequency = 1
## [1] -1001.82118 336.79164 -3142.94685 -437.17872 1541.14252 -1607.85140
## [7] -983.65496 1382.86638 -632.96824 -1273.53483 864.59963 11.56332
sales.pred <-ts((predict.lm + resid.best.arma.pred$pred), start = c(2022,1), freq = 12)
sales.pred
## Jan Feb Mar Apr May Jun Jul Aug
## 2022 201462.2 171980.1 166213.4 195832.2 163390.2 162593.8 199732.0 178052.2
## Sep Oct Nov Dec
## 2022 178135.7 220498.8 178840.9 183754.6
ts.plot(cbind(salespre, sales.pred), lty= 1:2)
ts.plot(salespost, sales.pred, lty= 1:12, col = c("red","blue"))
Model accuracy
mean(abs(salespost[1:8]-sales.pred[1:8])/salespost[1:8])*100
## [1] 7.375618