# Install pacman if needed
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
# load packages
pacman::p_load(pacman,
tidyverse, openxlsx)
oreos <- read.xlsx("datasets/Oreos.xlsx", skipEmptyRows = TRUE)
head(oreos)
ggplot(data = oreos, aes(x=factor(Height.in.feet), y = Sales)) + geom_boxplot() + theme_minimal()
# Sales as explained by height in feet; must specify dataset
oreo_model_lm <- lm(Sales ~ factor(Height.in.feet), data = oreos)
#Print model summary
oreo_model_lm %>% summary()
##
## Call:
## lm(formula = Sales ~ factor(Height.in.feet), data = oreos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.500 -2.812 -0.250 3.500 7.500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.500 2.618 10.886 1.76e-06 ***
## factor(Height.in.feet)6 34.000 3.702 9.183 7.24e-06 ***
## factor(Height.in.feet)7 15.750 3.702 4.254 0.00213 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.236 on 9 degrees of freedom
## Multiple R-squared: 0.9037, Adjusted R-squared: 0.8823
## F-statistic: 42.24 on 2 and 9 DF, p-value: 2.666e-05
A positive coefficient indicates that as the value of the independent variable increases (height), the mean of the dependent variable (sales) also tends to increase.
At 5 feet sales are 28. Now we interpret 6 and 7 feet in relation to the 5 feet shelf location.
At 6 feet sales increase by 34 units, but at 7 feet sales are only increased by 15 units in relation 5 feet. Overall, 6 feet is better location for our Oreos.
p-values are <.05 for both independent variables
How do we write our equation if we want to use this linear model to forecast sales?
Sales = 28.5 + 34(6 feet) + 15.75(7 feet)
Luckily, for us we can use R to do the math for us.
explan_oreos <- tibble(oreos)
head(explan_oreos)
#Call predict - vector of predictions
predict(oreo_model_lm, explan_oreos)
## 1 2 3 4 5 6 7 8 9 10 11 12
## 28.50 62.50 44.25 28.50 28.50 62.50 62.50 44.25 44.25 28.50 62.50 44.25
#Put predictions inside a data frame
prediction_oreo <- explan_oreos %>%
mutate(forecast_sales = predict(oreo_model_lm, explan_oreos))
#Add residuals as a column
prediction_oreo <- prediction_oreo %>%
mutate(errors = forecast_sales - Sales)
#check results
head(prediction_oreo)
prediction_oreo %>%
group_by(Height.in.feet) %>%
summarize(avg_sales = mean(forecast_sales),
sum_sales = sum(forecast_sales))