In this project, I will utilize various R packages to explore the sales data and build models that can predict future sales.
The analysis presents the ideas, the path leading to the ideas and the presentation of the results and the accuracy of the model.
Load the required packages:
library(ggplot2)
library(forecast)
library(tseries)
library(tidyverse)
library("rio")
library(readxl)
library(lubridate)
library(dplyr)
library(forcats)
library(kableExtra)
sales.dat <- read.csv("training.csv")
Look at the first 10 rows of the data in the sales dataframe
df <- head(sales.dat, 10)
kable(df) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| order_id | contact_id | purchase_date | product_id | quantity | sales_amount |
|---|---|---|---|---|---|
| 4852169 | 17846335 | 2012-01-01 | 3308032 | 1 | 18.71 |
| 3570597 | 18759585 | 2012-01-01 | 37220513 | 2 | 83.50 |
| 32096674 | 5025194 | 2012-01-01 | 10259603 | 1 | 19.95 |
| 2904402 | 29624779 | 2012-01-01 | 3308032 | 1 | 18.71 |
| 38369470 | 42287117 | 2012-01-01 | 3308032 | 1 | 18.71 |
| 10514626 | 34476502 | 2012-01-01 | 21913642 | 2 | 39.90 |
| 36513528 | 5025194 | 2012-01-01 | 30518406 | 1 | 45.56 |
| 6595294 | 14603912 | 2012-01-01 | 16308207 | 4 | 159.80 |
| 18187529 | 10412066 | 2012-01-01 | 11543327 | 1 | 239.95 |
| 37889477 | 35117040 | 2012-01-01 | 21103908 | 1 | 37.95 |
Convert the purchase dates in data format and add colums for year, month, days and weeks.
## Convert the purchase dates in data format
sales.dat$purchase_date <- ymd(sales.dat$purchase_date)
sales.dat$Date <- sales.dat$purchase_date
sales.dat$year <- as.factor(year(sales.dat$purchase_date))
sales.dat$month <- as.factor(month(sales.dat$purchase_date))
sales.dat$day <- as.factor(day(sales.dat$purchase_date))
sales.dat$weekday <- as.factor(weekdays(sales.dat$purchase_date))
Remove Zero sales from the data
# Remove zero sales in the data
sales.dat <- sales.dat[sales.dat$sales_amount > 0, ]
HIGHS, LOWS, BEST AND WORST SALES Overall highest sales
top.sales <- sales.dat %>%
slice_max(sales_amount, n=5)
kable(top.sales) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| order_id | contact_id | purchase_date | product_id | quantity | sales_amount | Date | year | month | day | weekday |
|---|---|---|---|---|---|---|---|---|---|---|
| 10199301 | 8166406 | 2012-02-17 | 23237796 | 50 | 10997.50 | 2012-02-17 | 2012 | 2 | 17 | Friday |
| 16120223 | 36576499 | 2012-12-05 | 18800419 | 50 | 4997.50 | 2012-12-05 | 2012 | 12 | 5 | Wednesday |
| 28260999 | 36576499 | 2013-03-12 | 18800419 | 50 | 4997.50 | 2013-03-12 | 2013 | 3 | 12 | Tuesday |
| 25015419 | 10558171 | 2012-02-27 | 32836058 | 1 | 4482.23 | 2012-02-27 | 2012 | 2 | 27 | Monday |
| 31348848 | 27348661 | 2013-01-18 | 7796190 | 2 | 4223.92 | 2013-01-18 | 2013 | 1 | 18 | Friday |
Highest Sales for 2012
top.2012 <- sales.dat %>% filter(year == 2012) %>%
slice_max(sales_amount, n=5)
kable(top.2012) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| order_id | contact_id | purchase_date | product_id | quantity | sales_amount | Date | year | month | day | weekday |
|---|---|---|---|---|---|---|---|---|---|---|
| 10199301 | 8166406 | 2012-02-17 | 23237796 | 50 | 10997.50 | 2012-02-17 | 2012 | 2 | 17 | Friday |
| 16120223 | 36576499 | 2012-12-05 | 18800419 | 50 | 4997.50 | 2012-12-05 | 2012 | 12 | 5 | Wednesday |
| 25015419 | 10558171 | 2012-02-27 | 32836058 | 1 | 4482.23 | 2012-02-27 | 2012 | 2 | 27 | Monday |
| 20041945 | 29412637 | 2012-05-15 | 27441828 | 260 | 4147.00 | 2012-05-15 | 2012 | 5 | 15 | Tuesday |
| 46251672 | 122106 | 2012-05-17 | 39469603 | 1 | 3450.62 | 2012-05-17 | 2012 | 5 | 17 | Thursday |
Highest Sales for 2013
top.2013 <- sales.dat %>% filter(year == 2013) %>%
slice_max(sales_amount, n=5)
kable(top.2013) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| order_id | contact_id | purchase_date | product_id | quantity | sales_amount | Date | year | month | day | weekday |
|---|---|---|---|---|---|---|---|---|---|---|
| 28260999 | 36576499 | 2013-03-12 | 18800419 | 50 | 4997.50 | 2013-03-12 | 2013 | 3 | 12 | Tuesday |
| 31348848 | 27348661 | 2013-01-18 | 7796190 | 2 | 4223.92 | 2013-01-18 | 2013 | 1 | 18 | Friday |
| 7857999 | 36576499 | 2013-01-25 | 18800419 | 40 | 3998.00 | 2013-01-25 | 2013 | 1 | 25 | Friday |
| 9099385 | 16408693 | 2013-03-26 | 2034783 | 1 | 3503.03 | 2013-03-26 | 2013 | 3 | 26 | Tuesday |
| 42213345 | 19201239 | 2013-03-30 | 2034783 | 1 | 3483.21 | 2013-03-30 | 2013 | 3 | 30 | Saturday |
| 5363792 | 22945018 | 2013-04-11 | 2034783 | 1 | 3483.21 | 2013-04-11 | 2013 | 4 | 11 | Thursday |
Best Performing month of 2012 and 2013
best.month.2012 <- sales.dat %>%
filter(year == 2012) %>%
group_by(month) %>%
summarize(Total_sales = sum(sales_amount)) %>%
arrange(desc(Total_sales))
kable(best.month.2012) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| month | Total_sales |
|---|---|
| 11 | 2242950 |
| 12 | 2172004 |
| 1 | 1767764 |
| 6 | 1660848 |
| 7 | 1655123 |
| 5 | 1639885 |
| 3 | 1620713 |
| 8 | 1590792 |
| 10 | 1587317 |
| 4 | 1515504 |
| 9 | 1500107 |
| 2 | 1491336 |
best.month.2013 <- sales.dat%>%
filter(year == 2013) %>%
group_by(month) %>%
summarize(Total_sales = sum(sales_amount)) %>%
arrange(desc(Total_sales))
kable(best.month.2013) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| month | Total_sales |
|---|---|
| 12 | 1872236.2 |
| 11 | 1647369.4 |
| 1 | 1383460.4 |
| 10 | 1372081.1 |
| 6 | 1249496.1 |
| 4 | 1240470.6 |
| 5 | 1236225.7 |
| 9 | 1202684.2 |
| 8 | 1195198.4 |
| 7 | 1184992.2 |
| 3 | 1093032.6 |
| 2 | 971850.8 |
Total Year Revenue sales in 2012 and 2013
rev.2012 <- sales.dat%>%
filter(year == 2012) %>%
summarize(total = sum(sales_amount))
kable(rev.2012) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| total |
|---|
| 20444342 |
rev.2013 <- sales.dat%>%
filter(year == 2013) %>%
summarize(total = sum(sales_amount))
kable(rev.2013) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| total |
|---|
| 15649098 |
Percent change in Revenue == sales dropped by
rev.change <- ((rev.2013 / rev.2012) * 100)
kable(rev.change) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| total |
|---|
| 76.54488 |
Total number of products
prods <- sales.dat %>%
select(product_id) %>%
distinct() %>%
summarise("No of Products" = n()) #there are 16471 product
kable(prods) %>%
kable_styling(bootstrap_options = "striped", font_size = 12, full_width = F)
| No of Products |
|---|
| 16471 |
Most popular products in the last 2 years- Products with maximum sales in terms of quantity
popular_products <- sales.dat %>%
group_by(product_id) %>%
summarize(Prd.Cnt = sum(quantity)) %>%
ungroup() %>%
arrange(desc(Prd.Cnt))
options(scipen = 999)
# Plot top 20 popular products in 2012 and 2013 by sales
head(popular_products, 20) %>%
ggplot(aes(
x = reorder(as.factor(product_id), Prd.Cnt),
y = Prd.Cnt, fill = as.factor(product_id)
)) +
geom_bar(stat = "identity") +
theme(legend.position = "none") +
labs(y = "Total Sales", x = "Product ID", title = "Top 10 Highest Selling Products (2012-2013)") +
coord_flip()
Visualize the weekly sales data by month
par(mfrow=c(2,1))
sales.dat %>%
filter(year == 2012) %>%
ggplot(., aes(x = purchase_date, y = sales_amount)) +
geom_col()+
ggtitle("Sales Per week of 2012")
sales.dat %>%
filter(year == 2013) %>%
ggplot(., aes(x = purchase_date, y = sales_amount)) +
geom_col()+
ggtitle("Sales Per week of 2013")
Monthly Sales 2012
sales.dat %>%
group_by(month) %>%
filter(year == 2012) %>%
ggplot(., aes(x = as.factor(month),
y = sales_amount)) +
geom_col() +
ggtitle("Monthly Sales 2012") +
coord_flip()
Monthly Sales 2013
sales.dat %>%
group_by(month) %>%
filter(year == 2013) %>%
ggplot(., aes(x = as.factor(month),
y = sales_amount)) +
geom_col() +
ggtitle("Monthly Sales 2013") +
coord_flip()
Product Popularity Most popular products in the last 3 years- Products with maximum sales in terms of quantity
popular_products <- sales.dat %>%
group_by(product_id) %>%
summarize(Prd.Cnt = sum(quantity)) %>%
ungroup() %>%
arrange(desc(Prd.Cnt))
options(scipen = 999)
Plot of top 20 most popular products in the last 3 years by sales
head(popular_products, 20) %>%
ggplot(aes(
x = reorder(as.factor(product_id), Prd.Cnt),
y = Prd.Cnt, fill = as.factor(product_id)
)) +
geom_bar(stat = "identity") +
theme(legend.position = "none") +
labs(y = "Total Sales", x = "Products", title = "Highest Selling Products") +
coord_flip()
Total number of contacts (customers)
sales.dat %>%
select(contact_id) %>%
distinct() %>%
summarise("No of Contacts" = n())
## No of Contacts
## 1 262294
## There were 262,295 total unique customers in the past 3 years
Most popular products(by product count) per customer between 2012-2013
pop.prod.per.cont <- sales.dat %>%
group_by(contact_id, product_id) %>%
summarize(Qty = sum(quantity)) %>%
filter(Qty == max(Qty)) %>%
arrange(desc(Qty)) %>%
ungroup()
## `summarise()` has grouped output by 'contact_id'. You can override using the `.groups` argument.
ggplot(
data = head(pop.prod.per.cont, 20),
aes(x = reorder(as.factor(contact_id), Qty), y = Qty, fill = as.factor(product_id))
) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Most Selling Product per Contact", x = "Contact ID", y = "Qty Sold", fill = "Product ID")
Most popular products in 2012 and 2013 respectively- Products with maximum sales in terms of quantity
pop.prods.2012 <- sales.dat %>%
filter(year == 2012)%>%
group_by(product_id) %>%
summarize(Prd.Cnt = sum(quantity)) %>%
ungroup() %>%
arrange(desc(Prd.Cnt))
options(scipen = 999)
Plot of top 20 most popular products in 2012 years by sales
head(pop.prods.2012, 20) %>%
ggplot(aes(
x = reorder(as.factor(product_id), Prd.Cnt),
y = Prd.Cnt, fill = as.factor(product_id)
)) +
geom_bar(stat = "identity") +
theme(legend.position = "none") +
labs(y = "Total Sales", x = "Product ID", title = "Highest Selling Products in 2012") +
coord_flip()
Plot of top 20 most popular products in 2013 years by sales
pop.prods.2013 <- sales.dat %>%
filter(year == 2013)%>%
group_by(product_id) %>%
summarize(Prd.Cnt = sum(quantity)) %>%
ungroup() %>%
arrange(desc(Prd.Cnt))
options(scipen = 999)
head(pop.prods.2013, 20) %>%
ggplot(aes(
x = reorder(as.factor(product_id), Prd.Cnt),
y = Prd.Cnt, fill = as.factor(product_id)
)) +
geom_bar(stat = "identity") +
theme(legend.position = "none") +
labs(y = "Total Sales", x = "Product ID", title = "Highest Selling Products in 2013") +
coord_flip()
Stacked yearly sales
sales.dat %>%
group_by(year) %>%
ggplot(., aes(x = month,
y = sales_amount,
fill = as.factor(year))) +
geom_col() +
ggtitle("2012 to 2013 Monthly Sales")
CLEANING THE DATA FOR MODELING AND PREDICTION
Data Cleaning and Exploratory Data Analysis
Using sample data (fraction of the origianl data) for modeling and easy visualization
#sample 1000 data points
small_data <- sample_n(sales.dat, 1000)
Plot of the sales field over time to show variance in this field
#Plot the sales field over time - show variance in this field
ggplot(small_data, aes(Date, sales_amount)) +
geom_line() +
scale_x_date("month") +
ylab("Monthly Sales") +
xlab("")
Plot of month over month to see the range and outliers
ggplot(small_data, aes(Date, sales_amount)) +
geom_point(color = "navyblue") +
facet_wrap( ~ month) +
scale_x_date("month") +
ylab("Monthly Sales") +
xlab("")
# Create a time series object based on sales and pass on tsclean
small.sales.ts <- ts(small_data[, c("sales_amount")])
small_data$clean_count <- tsclean(small.sales.ts)
#tsclean() identifies outliers and replace missing values if there are any
Plot of the cleaned data
#pLot the cleaned data
ggplot() +
geom_line(data = small_data, aes(x = Date, y = clean_count)) + ylab("Cleaned Count")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
Monthly and Weekly Moving averages
# Get the monthly and weekly moving averages (ma) and compare to cleaned daily data which still has lots of variance
small_data$cnt_ma <- ma(small_data$clean_count, order = 7) # weekly ma - using the clean and no outliers dataset
small_data$cnt_ma30 <- ma(small_data$clean_count, order = 30) # monthly ma
ggplot() +
geom_line(data = small_data, aes(x = Date,y = clean_count, color = "Counts")) +
geom_line(data = small_data, aes(x = Date, y = cnt_ma, color = "Weekly Moving Average")) +
geom_line(data = small_data, aes(x = Date, y = cnt_ma30, color = "Monthly Moving Average")) +
ylab("Sales")
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
Decomposing the time series data
# Decomposing the time series data - taking seasonality, trand and cycle into account
#calculate seasonal component with stl()
small_count_ma <- ts(na.omit(small_data$cnt_ma), frequency = 30)
small_decomp <- stl(small_count_ma, s.window = "periodic")
small_deseasonal_cnt <- seasadj(small_decomp) #used later in ARIMA modellater
plot(small_decomp)
Test for stationarity
#Test for stationary - first visual check for stationary variance
#2nd- Augmented Dickey-Fuller Test (adf)
adf.test(small_count_ma, alternative = "stationary") # More negative
## Warning in adf.test(small_count_ma, alternative = "stationary"): p-value smaller
## than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: small_count_ma
## Dickey-Fuller = -7.7034, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
# the data is stationary
Autocorelations and choosing model order
#ACF plots display correlation between series and its lags
Acf(small_count_ma, main='')
#PACF plots display correlation between series and its lag that is explained by previous lags
Pacf(small_count_ma, main='')
Differencing
small_count_d1 <- diff(small_deseasonal_cnt, differences = 1)
small_count_d2 <- diff(small_deseasonal_cnt, differences = 2)
#Test the adf for the differencing
small_adf0 <- adf.test(small_count_ma, alternative = "stationary") # More negative the adf, the more correct the model will be: we have an adf of -55.042, pvalue of 0.01
## Warning in adf.test(small_count_ma, alternative = "stationary"): p-value smaller
## than printed p-value
small_adf1 <- adf.test(small_count_d1, alternative = "stationary")
## Warning in adf.test(small_count_d1, alternative = "stationary"): p-value smaller
## than printed p-value
small_adf2 <- adf.test(small_count_d2, alternative = "stationary")
## Warning in adf.test(small_count_d2, alternative = "stationary"): p-value smaller
## than printed p-value
small_adf0;small_adf1;small_adf2
##
## Augmented Dickey-Fuller Test
##
## data: small_count_ma
## Dickey-Fuller = -7.7034, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
##
## Augmented Dickey-Fuller Test
##
## data: small_count_d1
## Dickey-Fuller = -12.717, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
##
## Augmented Dickey-Fuller Test
##
## data: small_count_d2
## Dickey-Fuller = -19.619, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
#Test the acf and pacf to identify the best fit differencing
Acf(small_count_ma, main='')
Acf(small_count_d1, main='') # this fits better
Pacf(small_count_ma, main='')
Pacf(small_count_d1, main='')
FITTING THE ARIMA MODEL
# Get auto fit p,d,q values
auto.arima(small_deseasonal_cnt, seasonal = FALSE)
## Series: small_deseasonal_cnt
## ARIMA(3,0,2) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 mean
## 0.7085 0.7737 -0.6344 0.2622 -0.5231 53.9924
## s.e. 0.0686 0.0614 0.0525 0.0775 0.0682 1.3309
##
## sigma^2 estimated as 75.23: log likelihood=-3555.54
## AIC=7125.08 AICc=7125.19 BIC=7159.39
Model Evaluation
#Evaluate and iterate the model
small_fit <- auto.arima(small_deseasonal_cnt, seasonal = FALSE)
tsdisplay(residuals(small_fit), lag.max = 45, main = '(1,1,1) Model Residuals')
small_fit2 <- arima(small_deseasonal_cnt, order = c(3,0,9))
## Warning in arima(small_deseasonal_cnt, order = c(3, 0, 9)): possible convergence
## problem: optim gave code = 1
tsdisplay(residuals(small_fit2), lag.max = 20, main = 'Seasonal Model Residuals')
Forecast with autoarima
#Forecast with autoarima
small_fcast <- forecast(small_fit, h=30)
small_fcast2 <- forecast(small_fit2, h=30)
plot(small_fcast)
plot(small_fcast2)
Model Testing
#TEST MODEL PERFORMANCE WITH A HOLDOUT SET
hold.set <- window(ts(small_deseasonal_cnt), start=450)
fit_no_holdset <- arima(ts(small_deseasonal_cnt[-c(350:994)]), order = c(3,0,9))
## Warning in log(s2): NaNs produced
## Warning in arima(ts(small_deseasonal_cnt[-c(350:994)]), order = c(3, 0, :
## possible convergence problem: optim gave code = 1
fcast_no_holdset <- forecast(fit_no_holdset, h=35)
plot(fcast_no_holdset, main = "")
lines(ts(small_deseasonal_cnt))
#the model needs seasonality added back as it is too linear and not realistic
fit_w_seasonality <- auto.arima(small_deseasonal_cnt, seasonal = TRUE)
seasonlity.fcast <- forecast(fit_w_seasonality, h=90)
plot(seasonlity.fcast)
lines(ts(small_count_ma)) #plots the actual values
lines(ts(small_deseasonal_cnt)) #plots the deseasonalized values
# the moving avarage are staying with the 95% range in the forecast, indication that the model is good
FURTHER TESTING AND ANALYSIS OF THE MODEL
#Test against the auto arima model
small_fit3 <- auto.arima(small_deseasonal_cnt, seasonal = FALSE)
tsdisplay(residuals(small_fit3), lag.max = 15, main = "Seasonal Model Residuals")
# There is evidence of a lag at 7, a higher order q value (q=7) might be necessary
small_fit4 <- arima(small_deseasonal_cnt, order = c(1,1,7))
tsdisplay(residuals(small_fit4), lag.max = 15, main = "Seasonal Model Residuals")
# Trying the default arima pdq values
small_fit5 <- arima(small_deseasonal_cnt, order = c(1,1,1))
tsdisplay(residuals(small_fit5), lag.max = 15, main = "Seasonal Model Residuals")
small_fit6 <- arima(small_deseasonal_cnt, order = c(3,0,7))
tsdisplay(residuals(small_fit6), lag.max = 20, main = 'Seasonal Model Residuals')
#plot the forecasts and select the best model
small_fcast <- forecast(small_fit, h=30)
small_fcast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 34.13333 47.48162 36.36596 58.59728 30.48169 64.48155
## 34.16667 48.31492 32.82290 63.80693 24.62192 72.00791
## 34.20000 48.60771 29.93075 67.28466 20.04377 77.17165
## 34.23333 49.91492 29.31586 70.51399 18.41137 81.41848
## 34.26667 50.53902 28.65727 72.42077 17.07377 84.00426
## 34.30000 51.80689 29.28427 74.32950 17.36152 86.25226
## 34.33333 52.35877 29.48740 75.23014 17.38003 87.33751
## 34.36667 53.33484 30.35174 76.31794 18.18522 88.48446
## 34.40000 53.64906 30.62935 76.66878 18.44344 88.85469
## 34.43333 54.27679 31.25660 77.29697 19.07045 89.48312
## 34.46667 54.34543 31.32291 77.36795 19.13553 89.55534
## 34.50000 54.68040 31.63861 77.72220 19.44102 89.91979
## 34.53333 54.57261 31.50850 77.63672 19.29910 89.84613
## 34.56667 54.71186 31.61445 77.80927 19.38743 90.03630
## 34.60000 54.51461 31.39264 77.63658 19.15261 89.87661
## 34.63333 54.55098 31.40424 77.69772 19.15109 89.95087
## 34.66667 54.33578 31.17534 77.49622 18.91495 89.75662
## 34.70000 54.33659 31.16506 77.50811 18.89880 89.77438
## 34.73333 54.14758 30.97179 77.32338 18.70326 89.59190
## 34.76667 54.15081 30.97215 77.32947 18.70211 89.59952
## 34.80000 54.00636 30.82723 77.18549 18.55694 89.45578
## 34.83333 54.02641 30.84704 77.20577 18.57663 89.47619
## 34.86667 53.92680 30.74739 77.10620 18.47696 89.37664
## 34.90000 53.96338 30.78394 77.14283 18.51348 89.41329
## 34.93333 53.89951 30.71970 77.07932 18.44905 89.34997
## 34.96667 53.94576 30.76574 77.12578 18.49497 89.39654
## 35.00000 53.90590 30.72543 77.08636 18.45444 89.35735
## 35.03333 53.95396 30.77330 77.13461 18.50220 89.40571
## 35.06667 53.92783 30.74688 77.10877 18.47563 89.38002
## 35.10000 53.97179 30.79075 77.15282 18.51945 89.42412
plot(small_fcast)
small_fcast2 <- forecast(small_fit2, h=30)
plot(small_fcast2)
small_fcast3 <- forecast(small_fit3, h=30)
plot(small_fcast3)
small_fcast4 <- forecast(small_fit4, h=30)
plot(small_fcast4)
small_fcast5 <- forecast(small_fit5, h=30)
plot(small_fcast5)
small_fcast6 <- forecast(small_fit6, h=30)
plot(small_fcast6)
# Predict 90 days - whole year (360 days)
small_fcast <- forecast(small_fit, h=90)
plot(small_fcast)
small_fcast2 <- forecast(small_fit2, h=90)
plot(small_fcast2)
small_fcast3 <- forecast(small_fit3, h=90)
plot(small_fcast3)
small_fcast4 <- forecast(small_fit4, h=90)
plot(small_fcast4)
small_fcast5 <- forecast(small_fit5, h=90)
plot(small_fcast5)
small_fcast6 <- forecast(small_fit6, h=90)
plot(small_fcast6)
Of all the models tested, small_fit, small_fit1, small_fit2,small_fit3, and small_fit appears to be giving the best prediction for the sales.