# Install pacman if needed
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
# load packages
pacman::p_load(pacman,
  tidyverse, openxlsx)
oreos <- read.xlsx("datasets/Oreos.xlsx", skipEmptyRows = TRUE)

Inspect data

head(oreos)
ggplot(data = oreos, aes(x=factor(Height.in.feet), y = Sales)) + geom_boxplot() + theme_minimal()

Build Regression Model

# Sales as explained by height in feet; must specify dataset
oreo_model_lm <-  lm(Sales ~ factor(Height.in.feet), data = oreos) 

Summary output

#Print model summary
oreo_model_lm %>% summary()
## 
## Call:
## lm(formula = Sales ~ factor(Height.in.feet), data = oreos)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.500 -2.812 -0.250  3.500  7.500 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               28.500      2.618  10.886 1.76e-06 ***
## factor(Height.in.feet)6   34.000      3.702   9.183 7.24e-06 ***
## factor(Height.in.feet)7   15.750      3.702   4.254  0.00213 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.236 on 9 degrees of freedom
## Multiple R-squared:  0.9037, Adjusted R-squared:  0.8823 
## F-statistic: 42.24 on 2 and 9 DF,  p-value: 2.666e-05

A positive coefficient indicates that as the value of the independent variable increases (height), the mean of the dependent variable (sales) also tends to increase.

At 5 feet sales are 28. Now we interpret 6 and 7 feet in relation to the 5 feet shelf location.

At 6 feet sales increase by 34 units, but at 7 feet sales are only increased by 15 units in relation 5 feet. Overall, 6 feet is better location for our Oreos.

p-values are <.05 for both independent variables

How do we write our equation if we want to use this linear model to forecast sales?

Sales = 28.5 + 34(6 feet) + 15.75(7 feet)

Luckily, for us we can use R to do the math for us.

  explan_oreos <- tibble(oreos)
  
  head(explan_oreos)
#Call predict - vector of predictions
predict(oreo_model_lm, explan_oreos)
##     1     2     3     4     5     6     7     8     9    10    11    12 
## 28.50 62.50 44.25 28.50 28.50 62.50 62.50 44.25 44.25 28.50 62.50 44.25
#Put predictions inside a data frame
prediction_oreo <- explan_oreos  %>% 
  mutate(forecast_sales = predict(oreo_model_lm, explan_oreos))

#Add residuals as a column
prediction_oreo <- prediction_oreo %>% 
  mutate(errors = forecast_sales - Sales)

#check results
head(prediction_oreo)

Build forecast

prediction_oreo %>% 
  group_by(Height.in.feet) %>% 
  summarize(avg_sales = mean(forecast_sales),
            sum_sales = sum(forecast_sales))