Executive Summary

This analysis investigates how advertising budgets, market size, and holiday weeks influence weekly sales performance for an e-commerce company. Using linear regression models in R, we evaluated both simple and multiple regression approaches to determine actionable insights.

Key Findings

Business Recommendations

Load and Prepare Data

sales_data <- read.csv("e_commerce_sales_data.csv")

# Convert categorical variables to factors
sales_data$MarketSize <- as.factor(sales_data$MarketSize)
sales_data$HolidayWeek <- as.factor(sales_data$HolidayWeek)

str(sales_data)
## 'data.frame':    52 obs. of  5 variables:
##  $ Week       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Advertising: num  28.8 22 24.9 31.2 29.3 ...
##  $ MarketSize : Factor w/ 3 levels "Large","Medium",..: 2 3 2 1 3 2 1 3 1 3 ...
##  $ HolidayWeek: Factor w/ 2 levels "No","Yes": 2 1 2 1 2 2 1 1 2 2 ...
##  $ Sales      : num  69.3 46.6 60.2 68.3 60.8 ...
summary(sales_data)
##       Week        Advertising     MarketSize HolidayWeek     Sales      
##  Min.   : 1.00   Min.   : 7.24   Large :25   No :24      Min.   :17.01  
##  1st Qu.:13.75   1st Qu.:17.16   Medium:12   Yes:28      1st Qu.:37.86  
##  Median :26.50   Median :20.75   Small :15               Median :47.27  
##  Mean   :26.50   Mean   :20.63                           Mean   :46.23  
##  3rd Qu.:39.25   3rd Qu.:24.43                           3rd Qu.:53.84  
##  Max.   :52.00   Max.   :31.35                           Max.   :77.11

Exploratory Data Analysis

ggplot(sales_data, aes(x = Advertising, y = Sales)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Sales vs Advertising Budget", x = "Advertising (in $1000s)", y = "Sales (in $1000s)")
## `geom_smooth()` using formula = 'y ~ x'

Simple Linear Regression

model_simple <- lm(Sales ~ Advertising, data = sales_data)
summary(model_simple)
## 
## Call:
## lm(formula = Sales ~ Advertising, data = sales_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.0493  -5.8952   0.7356   7.4668  16.7441 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8.8572     4.8823   1.814   0.0757 .  
## Advertising   1.8121     0.2285   7.929 2.14e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.173 on 50 degrees of freedom
## Multiple R-squared:  0.557,  Adjusted R-squared:  0.5482 
## F-statistic: 62.88 on 1 and 50 DF,  p-value: 2.139e-10

Multiple Linear Regression

model_multiple <- lm(Sales ~ Advertising + MarketSize + HolidayWeek, data = sales_data)
summary(model_multiple)
## 
## Call:
## lm(formula = Sales ~ Advertising + MarketSize + HolidayWeek, 
##     data = sales_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.237  -5.153   1.178   5.034  14.213 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        6.0106     4.8336   1.244  0.21985    
## Advertising        1.9287     0.2178   8.855 1.39e-11 ***
## MarketSizeMedium  -6.7073     3.5317  -1.899  0.06369 .  
## MarketSizeSmall   -8.8684     3.2713  -2.711  0.00934 ** 
## HolidayWeekYes     8.4428     2.9542   2.858  0.00634 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.571 on 47 degrees of freedom
## Multiple R-squared:  0.6364, Adjusted R-squared:  0.6055 
## F-statistic: 20.57 on 4 and 47 DF,  p-value: 7.537e-10

Model Evaluation

cat("Simple Model R-squared: ", summary(model_simple)$r.squared, "\n")
## Simple Model R-squared:  0.557034
cat("Multiple Model R-squared: ", summary(model_multiple)$r.squared, "\n")
## Multiple Model R-squared:  0.6364134
cat("Multiple Model Adjusted R-squared: ", summary(model_multiple)$adj.r.squared, "\n")
## Multiple Model Adjusted R-squared:  0.6054699

Visualize Predictions

sales_data$PredictedSales <- predict(model_multiple)

ggplot(sales_data, aes(x = Advertising, y = Sales, color = MarketSize)) +
  geom_point() +
  geom_line(aes(y = PredictedSales), linetype = "dashed") +
  labs(title = "Multiple Regression Predictions by Market Size", x = "Advertising", y = "Sales")

Save Predicted Sales

write.csv(data.frame(Week = sales_data$Week,
                     ActualSales = sales_data$Sales,
                     PredictedSales = sales_data$PredictedSales),
          "predicted_sales_results.csv",
          row.names = FALSE)