library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(ridge)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.0-2
library(ggplot2)
library(caTools)
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ----------------------------------------------------------------------------------------------------------- fpp2 2.4 --
## v forecast 8.13 v expsmooth 2.3
## v fma 2.4
## -- Conflicts -------------------------------------------------------------------------------------------------------------- fpp2_conflicts --
## x caret::lift() masks purrr::lift()
library(dplyr)
library(rcompanion)
##
## Attaching package: 'rcompanion'
## The following object is masked from 'package:forecast':
##
## accuracy
#Data
data <- read.csv("C:/Users/Icarus/Documents/R/Alcohol_Sales.csv")
#Working Directory
setwd("C:/Users/Icarus/Documents/R/")
#Viewing the alcohol sales data and making general observations:
View(data)
dim(data)
## [1] 325 2
nrow(data)
## [1] 325
ncol(data)
## [1] 2
#There are 325 rows and 2 columns of alcohol sales data.
#The data is monthly sales quantity from 1992 to 2019.
#Let's take a look for any missing values or outliers:
missingdata <- sum(is.na(data))
print(missingdata)
## [1] 0
sum(as.integer(data$Sell.Quantity>quantile(data$Sell.Quantity, probs = 0.95)))
## [1] 17
#This says there are 17 outliers in our data set.
#Setting up a single vector with alcohol sales:
alcsales <- data$Sell.Quantity
is.ts(alcsales)
## [1] FALSE
#is.ts function returned false, so we have verified this numeric data is not a time series.
#Setting up a time series with the sales data:
as.ts <- ts(alcsales, start=1992, frequency=12)
is.ts(as.ts)
## [1] TRUE
#After setting up the data with the ts function and entering start/frequency,
#we now have a time series. is.ts returns TRUE.
#Plotting our time series to begin initial observations:
plot(as.ts)
autoplot(as.ts)
#As you can see, the x axis represents time (yrs) and the y represents sale quantity.
#Looking at a simple plot/autoplot, we can see there is an upward trend with sales
#increasing over time. It also appears as if there may be seasonality. We'll
#have to look closer for a more accurate observation.
#Checking our mean, median, and standard deviation of alcohol sales.
summary(as.ts)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3031 5231 7481 7886 9977 15504
#Mean is 7886 and Median is 7481. A mean higher than the median can indicate
#that the data is right skewed, with checks with our initial observation.
sd(as.ts)
## [1] 2914.269
#Our sd is 2914, a higher number, expected considering the spread of values.
#Let's take a loot as some other visualizations:
hist(as.ts, prob=TRUE)
#This histogram shows the amount of sales in a given range, indicated by the bins.
#To get a more refined look:
alcohol.hist <- ggplot(data, aes(DATE, Sell.Quantity)) + geom_bar(stat="identity", na.rm=TRUE) +
xlab("dates") + ylab("sales")
alcohol.hist
#This histogram has an almost indistinguishable bin size, but shows the stready rises
#prior to the falls in sales, expressing what may be seasonality.
#Let's check our normal distribution on the standard histogram with rcompanion:
plotNormalHistogram(as.ts)
#Let's also take a look at the sales distribution with a boxplot:
boxplot(data$Sell.Quantity, main="SALES", sub=paste("Outlier rows: ", boxplot.stats(data$Sell.Quantity)$out))
#Let's take a closer look at the pattern in the alcohol sales data.
#First, we have to set up a trend to use in a linear model:
observations = nrow(data)
observations
## [1] 325
trend = seq(1,observations,1)
#Let's run our linear regression.
#The function lm takes two main agruments, formula and data.
#Since we're just using our trend, we'll use the time series to the trend seq:
lm(as.ts ~ trend)
##
## Call:
## lm(formula = as.ts ~ trend)
##
## Coefficients:
## (Intercept) trend
## 3137.48 29.13
#We have an intercept of 3137 and a trend of 29.13.
#The trend, or coeffecient of time in this case, means that time has a positive
#impact on sales quantity. Let's ensure we have a statistically significant
#model prior to moving forward:
fit.sales <- lm(formula = as.ts ~ trend, data = as.ts)
summary(fit.sales)
##
## Call:
## lm(formula = as.ts ~ trend, data = as.ts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2909.34 -459.48 22.37 614.69 2926.95
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3137.4819 111.2915 28.19 <2e-16 ***
## trend 29.1345 0.5917 49.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1001 on 323 degrees of freedom
## Multiple R-squared: 0.8824, Adjusted R-squared: 0.8821
## F-statistic: 2424 on 1 and 323 DF, p-value: < 2.2e-16
#Our equation here for estimated sales is
#3137.48 + 29.13 + ε^i where ε∼N(0, 1001)
#With this summary, we have determined the coefficient is statistically significant.
#This is an observation we made and confirmed from our initial observations.
#Let's put a regression line over our model to visualize this:
plot(data$Sell.Quantity)
abline(fit.sales, col=4)
#To take a better look at possible seasonality, let's call
#on ggplots function, ggseasonplot:
ggseasonplot(as.ts)
#We can see there is indeed some seasonality, with sales increasing in
#the late spring and again leading up to the holidays.
#Using other functions to make observations.
#ggsubseriesplot takes each season and plots it as a mini time series:
ggsubseriesplot(as.ts)
#This subseries plot verifies our previous observation with means (the blue lines)
#being highest in May-Jun and Dec.
#gglagplot plots time series against lagged versions of themselves:
gglagplot(as.ts)
#This helps us visualize dependence on variables with each series.
#Let's see what the autocorrelation function says about our sales:
Acf(as.ts)
#This produces a correlogram, plotting autocorrelation vs time lag.
#The autocorrelation at Lag 1 is 0.9 and Lag 12 is again 0.9.
#It's worth noting that the y scale is -1 to 1 because it is the
#correlation coefficient we're looking at here. The x axis is
#our intervals in years.
#The horizontal blue lines are the approx 95% confidence intervals.
#We can see the lags do have significant effect, meaning each
#year is dependent on previous values.
#let's play with a smaller window of time:
new.as.ts = window(as.ts, start=c(2009,3), end=c(2019,1))
new.as.ts
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2009 8688 9162 9369 10167 9507 8923 9272 9075 8949 10843
## 2010 6558 7481 9475 9424 9351 10552 9077 9273 9420 9413 9866 11455
## 2011 6901 8014 9832 9281 9967 11344 9106 10469 10085 9612 10328 11483
## 2012 7486 8641 9709 9423 11342 11274 9845 11163 9532 10754 10953 11922
## 2013 8395 8888 10110 10493 12218 11385 11186 11462 10494 11540 11138 12709
## 2014 8557 9059 10055 10977 11792 11904 10965 10981 10828 11817 10470 13310
## 2015 8400 9062 10722 11107 11508 12904 11869 11224 12022 11983 11506 14183
## 2016 8648 10321 12107 11420 12238 13681 10950 12700 12272 11905 13016 14421
## 2017 9043 10452 12481 11491 13545 14730 11416 13402 11907 12711 13261 14265
## 2018 9564 10415 12683 11919 14138 14583 12640 14257 12396 13914 14174 15504
## 2019 10718
#This time series is plotting the sale quantity from the time I was
#of legal drinking age until the last data point.
ggseasonplot(new.as.ts)
#The seasonality becomes more clear as we shrink the window.
#Setting up dates as numeric to experiment more.
#We'll just do this as a sequence of numbers titled "months":
data$months <- seq.int(nrow(data))
boxplot(new.as.ts)
cor(data$months, data$Sell.Quantity)
## [1] 0.9393713
#We can see there is a high correlation between months passed
#and sales quantity. Let's do some more linear regression work
#to include checking coefficients and residuals:
reg <- lm(data = data, data$Sell.Quantity ~ data$months)
print(reg)
##
## Call:
## lm(formula = data$Sell.Quantity ~ data$months, data = data)
##
## Coefficients:
## (Intercept) data$months
## 3137.48 29.13
regsummary <- summary(reg)
regsummary$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3137.48194 111.2914583 28.19158 4.447437e-89
## data$months 29.13447 0.5917499 49.23443 3.418065e-152
regsummary$residuals
## 1 2 3 4 5
## 292.3835960 262.2491294 777.1146628 1309.9801961 937.8457295
## 6 7 8 9 10
## 1216.7112628 1124.5767962 766.4423295 726.3078629 830.1733963
## 11 12 13 14 15
## 782.0389296 1448.9044630 -485.2300037 -284.3644703 585.5010630
## 16 17 18 19 20
## 773.3665964 674.2321298 1034.0976631 766.9631965 736.8287298
## 21 22 23 24 25
## 614.6942632 457.5597965 692.4253299 1137.2908633 -790.8436034
## 26 27 28 29 30
## -517.9780700 518.8874633 307.7529967 477.6185301 973.4840634
## 31 32 33 34 35
## 283.3495968 649.2151301 275.0806635 119.9461968 626.8117302
## 36 37 38 39 40
## 784.6772636 -845.4572031 -760.5916697 -4.7261364 -308.8606030
## 41 42 43 44 45
## 383.0049303 612.8704637 -167.2640029 580.6015304 -213.5329362
## 46 47 48 49 50
## 76.3325971 344.1981305 290.0636639 -866.0708028 -611.2052694
## 51 52 53 54 55
## -361.3397361 -33.4742027 537.3913306 125.2568640 201.1223974
## 56 57 58 59 60
## 292.9879307 -433.1465359 184.7189974 -6.4154692 211.4500641
## 61 62 63 64 65
## -1156.6844025 -1118.8188691 -518.9533358 -367.0878024 178.7777309
## 66 67 68 69 70
## -3.3567357 141.5087977 -84.6256690 -177.7601356 165.1053977
## 71 72 73 74 75
## -375.0290689 729.8364644 -1468.2980022 -1274.4324688 -424.5669355
## 76 77 78 79 80
## -261.7014021 -143.8358688 37.0296646 -4.1048021 -361.2392687
## 81 82 83 84 85
## 17.6262647 56.4917980 -209.6426686 701.2228647 -1581.9116019
## 86 87 88 89 90
## -1208.0460686 -193.1805352 -218.3150018 -143.4494685 416.4160649
## 91 92 93 94 95
## -167.7184018 71.1471316 -18.9873350 -27.1218017 274.7437317
## 96 97 98 99 100
## 836.6092650 -1720.5252016 -1040.6596683 -13.7941349 -697.9286015
## 101 102 103 104 105
## 354.9369318 563.8024652 -502.3320015 462.5335319 -309.6009348
## 106 107 108 109 110
## 96.2645986 265.1301320 393.9956653 -1231.1388013 -1126.2732680
## 111 112 113 114 115
## -478.4077346 -506.5422012 369.3233321 208.1888655 -113.9456012
## 116 117 118 119 120
## 322.9199322 -971.2145345 -30.3490011 184.5165323 546.3820656
## 121 122 123 124 125
## -1545.7524010 -1249.8868677 -384.0213343 -225.1558010 436.7097324
## 126 127 128 129 130
## -47.4247342 120.4407991 203.3063325 -747.8281342 -0.9626008
## 131 132 133 134 135
## -238.0970674 991.7684659 -1686.3660007 -1432.5004674 -656.6349340
## 136 137 138 139 140
## -358.7694007 15.0961327 -25.0383339 380.8271994 49.6927328
## 141 142 143 144 145
## -611.4417339 351.4237995 -460.7106672 1207.1548662 -1732.9796004
## 146 147 148 149 150
## -1493.1140671 -375.2485337 -355.3830004 -145.5174670 410.3480663
## 151 152 153 154 155
## -247.7864003 -169.9208669 -336.0553336 -356.1898002 77.6757331
## 156 157 158 159 160
## 1375.5412665 -2154.5932001 -1503.7276668 -46.8621334 -536.9966001
## 161 162 163 164 165
## 412.8689333 899.7344666 -534.4000000 580.4655334 -203.6689333
## 166 167 168 169 170
## -263.8033999 244.0621334 869.9276668 -1995.2067999 -1500.3412665
## 171 172 173 174 175
## -196.4757331 -813.6101998 665.2553336 1120.1208669 -444.0135997
## 176 177 178 179 180
## 890.8519337 -257.2825330 316.5830004 775.4485337 1163.3140671
## 181 182 183 184 185
## -1783.8203996 -1696.9548662 -274.0893328 -670.2237995 1042.6417339
## 186 187 188 189 190
## 927.5072672 22.3728006 928.2383339 -520.8961327 975.9694007
## 191 192 193 194 195
## 687.8349340 1333.7004674 -1667.4339993 -1306.5684659 -453.7029326
## 196 197 198 199 200
## 47.1626008 917.0281342 1070.8936675 617.7592009 410.6247342
## 201 202 203 204 205
## 231.4902676 925.3558010 -293.7786657 1758.0868677 -1844.0475990
## 206 207 208 209 210
## -1561.1820656 -480.3165323 -35.4509989 142.4145345 911.2800678
## 211 212 213 214 215
## 222.1456012 -390.9888655 -71.1233321 -297.2577988 -452.3922654
## 216 217 218 219 220
## 1412.4732680 -2901.6611987 -2007.7956653 -42.9301320 -123.0645986
## 221 222 223 224 225
## -225.1990652 946.6664681 -557.4679985 -390.6024652 -272.7369318
## 226 227 228 229 230
## -308.8713985 114.9941349 1674.8596683 -2908.2747984 -1824.4092650
## 231 232 233 234 235
## -35.5437317 -615.6781983 41.1873350 1389.0528684 -878.0815982
## 236 237 238 239 240
## 455.7839351 42.6494685 -459.4849982 227.3805352 1353.2460686
## 241 242 243 244 245
## -2672.8883981 -1547.0228647 -508.1573314 -823.2917980 1066.5737353
## 246 247 248 249 250
## 969.4392687 -488.6951979 800.1703354 -859.9641312 332.9014021
## 251 252 253 254 255
## 502.7669355 1442.6324688 -2113.5019978 -1649.6364644 -456.7709311
## 256 257 258 259 260
## -102.9053977 1592.9601356 730.8256690 502.6912023 749.5567357
## 261 262 263 264 265
## -247.5777309 769.2878024 338.1533358 1880.0188691 -2301.1155975
## 266 267 268 269 270
## -1828.2500641 -861.3845308 31.4810026 817.3465359 900.2120693
## 271 272 273 274 275
## -67.9223974 -81.0568640 -263.1913306 696.6742027 -679.4602639
## 276 277 278 279 280
## 2131.4052694 -2807.7291972 -2174.8636639 -543.9981305 -188.1325971
## 281 282 283 284 285
## 183.7329362 1550.5984696 486.4640029 -187.6704637 581.1950697
## 286 287 288 289 290
## 513.0606030 6.9261364 2654.7916697 -2909.3427969 -1265.4772636
## 291 292 293 294 295
## 491.3882698 -224.7461968 564.1193365 1977.9848699 -782.1495968
## 296 297 298 299 300
## 938.7159366 481.5814699 85.4470033 1167.3125367 2543.1780700
## 301 302 303 304 305
## -2863.9563966 -1484.0908633 515.7746701 -503.3597965 1521.5057368
## 306 307 308 309 310
## 2677.3712702 -665.7631965 1291.1023369 -233.0321298 541.8334036
## 311 312 313 314 315
## 1062.6989370 2037.5644703 -2692.5699963 -1870.7044630 368.1610704
## 316 317 318 319 320
## -424.9733963 1764.8921371 2180.7576705 208.6232038 1796.4887372
## 321 322 323 324 325
## -93.6457295 1395.2198039 1626.0853372 2926.9508706 -1888.1835960
#A residual is an estimate of the statistical error.
#Let's apply an application of linear regression:
mldata <- data
split <- sample.split(mldata, SplitRatio = 0.8)
train <- subset(mldata,split==T)
test <- subset(mldata,split==F)
mlreg <- lm(data=train, Sell.Quantity ~ months)
summary(mlreg)
##
## Call:
## lm(formula = Sell.Quantity ~ months, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2560.39 -535.80 43.83 632.71 2479.15
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3146.6599 125.3621 25.1 <2e-16 ***
## months 30.4882 0.6671 45.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 917 on 214 degrees of freedom
## Multiple R-squared: 0.9071, Adjusted R-squared: 0.9066
## F-statistic: 2089 on 1 and 214 DF, p-value: < 2.2e-16
#Creating a predicted sales model with our model:
pred.sales <- predict(mlreg, test)
pred.sales
## 1 4 7 10 13 16 19 22
## 3177.148 3268.613 3360.077 3451.542 3543.007 3634.471 3725.936 3817.401
## 25 28 31 34 37 40 43 46
## 3908.866 4000.330 4091.795 4183.260 4274.724 4366.189 4457.654 4549.118
## 49 52 55 58 61 64 67 70
## 4640.583 4732.048 4823.512 4914.977 5006.442 5097.906 5189.371 5280.836
## 73 76 79 82 85 88 91 94
## 5372.300 5463.765 5555.230 5646.694 5738.159 5829.624 5921.088 6012.553
## 97 100 103 106 109 112 115 118
## 6104.018 6195.482 6286.947 6378.412 6469.877 6561.341 6652.806 6744.271
## 121 124 127 130 133 136 139 142
## 6835.735 6927.200 7018.665 7110.129 7201.594 7293.059 7384.523 7475.988
## 145 148 151 154 157 160 163 166
## 7567.453 7658.917 7750.382 7841.847 7933.311 8024.776 8116.241 8207.705
## 169 172 175 178 181 184 187 190
## 8299.170 8390.635 8482.099 8573.564 8665.029 8756.493 8847.958 8939.423
## 193 196 199 202 205 208 211 214
## 9030.888 9122.352 9213.817 9305.282 9396.746 9488.211 9579.676 9671.140
## 217 220 223 226 229 232 235 238
## 9762.605 9854.070 9945.534 10036.999 10128.464 10219.928 10311.393 10402.858
## 241 244 247 250 253 256 259 262
## 10494.322 10585.787 10677.252 10768.716 10860.181 10951.646 11043.110 11134.575
## 265 268 271 274 277 280 283 286
## 11226.040 11317.504 11408.969 11500.434 11591.899 11683.363 11774.828 11866.293
## 289 292 295 298 301 304 307 310
## 11957.757 12049.222 12140.687 12232.151 12323.616 12415.081 12506.545 12598.010
## 313 316 319 322 325
## 12689.475 12780.939 12872.404 12963.869 13055.333
#Calculate prediction accuracy and Error Rates
actuals_preds <- data.frame(cbind(actuals=test$Sell.Quantity, predicteds=pred.sales))
correlation_accuracy <- cor(actuals_preds)
correlation_accuracy
## actuals predicteds
## actuals 1.0000000 0.9342392
## predicteds 0.9342392 1.0000000
#Min-Max accuracy and MAPE (mean absolute percentage error) tests:
min_max_accuracy <- mean(apply(actuals_preds, 1, min) / apply(actuals_preds, 1, max))
mape <- mean(abs((actuals_preds$predicteds - actuals_preds$actuals)) / actuals_preds$actuals)
#And checking the model performance stats:
ModelPerformance <- data.frame(R2 = R2(pred.sales, test$Sell.Quantity),
RMSE = RMSE(pred.sales, test$Sell.Quantity),
MAE = MAE(pred.sales, test$Sell.Quantity),
MinMaxAccuracy = min_max_accuracy, mape=mape)
ModelPerformance
## R2 RMSE MAE MinMaxAccuracy mape
## 1 0.8728028 1234.191 877.8535 0.8911116 0.1334418
#This displays our R squared, root mean squared, mean absolute, and mean absolute
#percentage errors as well as our minmax accuracy.
#Recording the results for comparison:
#R2 is .927, RMSE is 1149.16, MAE is 841.61, MinMax is .90, and mape is .096.
#Let's go ahead and do a k-fold cross-validation to estimate the accuracy of this model.
set.seed(123)
train.control <- trainControl(method = "cv", number = 10)
#Train the model for k-fold cross validation:
cv.reg <- train(Sell.Quantity ~ months, data = mldata, method = "lm",
trControl = train.control)
#Summarize the results
print(cv.reg)
## Linear Regression
##
## 325 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 292, 292, 292, 293, 293, 293, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 989.5989 0.8880777 749.5516
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(cv.reg)
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2909.34 -459.48 22.37 614.69 2926.95
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3137.4819 111.2915 28.19 <2e-16 ***
## months 29.1345 0.5917 49.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1001 on 323 degrees of freedom
## Multiple R-squared: 0.8824, Adjusted R-squared: 0.8821
## F-statistic: 2424 on 1 and 323 DF, p-value: < 2.2e-16
#We can see some of the same numbers here, such as intercept and
#coefficient on time (29).
#Testing the cross validation model
pred.sales.2 <- predict(cv.reg,test)
pred.sales.2
## 1 4 7 10 13 16 19 22
## 3166.616 3254.020 3341.423 3428.827 3516.230 3603.633 3691.037 3778.440
## 25 28 31 34 37 40 43 46
## 3865.844 3953.247 4040.650 4128.054 4215.457 4302.861 4390.264 4477.667
## 49 52 55 58 61 64 67 70
## 4565.071 4652.474 4739.878 4827.281 4914.684 5002.088 5089.491 5176.895
## 73 76 79 82 85 88 91 94
## 5264.298 5351.701 5439.105 5526.508 5613.912 5701.315 5788.718 5876.122
## 97 100 103 106 109 112 115 118
## 5963.525 6050.929 6138.332 6225.735 6313.139 6400.542 6487.946 6575.349
## 121 124 127 130 133 136 139 142
## 6662.752 6750.156 6837.559 6924.963 7012.366 7099.769 7187.173 7274.576
## 145 148 151 154 157 160 163 166
## 7361.980 7449.383 7536.786 7624.190 7711.593 7798.997 7886.400 7973.803
## 169 172 175 178 181 184 187 190
## 8061.207 8148.610 8236.014 8323.417 8410.820 8498.224 8585.627 8673.031
## 193 196 199 202 205 208 211 214
## 8760.434 8847.837 8935.241 9022.644 9110.048 9197.451 9284.854 9372.258
## 217 220 223 226 229 232 235 238
## 9459.661 9547.065 9634.468 9721.871 9809.275 9896.678 9984.082 10071.485
## 241 244 247 250 253 256 259 262
## 10158.888 10246.292 10333.695 10421.099 10508.502 10595.905 10683.309 10770.712
## 265 268 271 274 277 280 283 286
## 10858.116 10945.519 11032.922 11120.326 11207.729 11295.133 11382.536 11469.939
## 289 292 295 298 301 304 307 310
## 11557.343 11644.746 11732.150 11819.553 11906.956 11994.360 12081.763 12169.167
## 313 316 319 322 325
## 12256.570 12343.973 12431.377 12518.780 12606.184
#Now to check the performance of model 2:
ModelPerformance.2 <- data.frame(R2 = R2(pred.sales.2, test$Sell.Quantity),
RMSE = RMSE(pred.sales.2, test$Sell.Quantity),
MAE = MAE(pred.sales.2, test$Sell.Quantity),
MinMaxAccuracy = min_max_accuracy, mape=mape)
ModelPerformance.2
## R2 RMSE MAE MinMaxAccuracy mape
## 1 0.8728028 1086.815 762.4609 0.8911116 0.1334418
#The R2 value is .927, RMSE is 982.88, MAE is 742.08, MinMax is .90, and mape is .096.
#Model 2 having a lower RMSE and MAE indicates a more accurate prediction for sales quantity
#since these are calculated based on the average magnitude of errors.
#Based on seasonality, trends, and my experimentation with some linear regression models,
#I believe the sales data could be forecast with relative accuracy.