1 1. Data Preparation

# Load the data
sales_data <- read.csv("sales_data.csv")

# Preview structure
str(sales_data)
## 'data.frame':    52 obs. of  5 variables:
##  $ Week              : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Promotion         : chr  "Yes" "No" "No" "Yes" ...
##  $ Holiday           : chr  "No" "No" "No" "Yes" ...
##  $ CompetitorActivity: chr  "High" "Low" "High" "High" ...
##  $ Sales             : num  47.8 45.8 60 54.3 43.5 ...
# Convert categorical variables
sales_data$Promotion <- as.factor(sales_data$Promotion)
sales_data$Holiday <- as.factor(sales_data$Holiday)
sales_data$CompetitorActivity <- as.factor(sales_data$CompetitorActivity)

# Check for missing values
colSums(is.na(sales_data))
##               Week          Promotion            Holiday CompetitorActivity 
##                  0                  0                  0                  0 
##              Sales 
##                  0

2 2. Exploratory Data Analysis

2.1 2.1 Summary Statistics

summary(sales_data)
##       Week       Promotion Holiday  CompetitorActivity     Sales      
##  Min.   : 1.00   No :31    No :27   High  :16          Min.   :26.62  
##  1st Qu.:13.75   Yes:21    Yes:25   Low   :20          1st Qu.:42.34  
##  Median :26.50                      Medium:16          Median :48.33  
##  Mean   :26.50                                         Mean   :49.58  
##  3rd Qu.:39.25                                         3rd Qu.:55.41  
##  Max.   :52.00                                         Max.   :73.28

2.3 2.3 Boxplots: Sales vs Categorical Variables

# Promotion
ggplot(sales_data, aes(x = Promotion, y = Sales, fill = Promotion)) +
  geom_boxplot() +
  labs(title = "Sales During vs Outside Promotions")

# Holiday
ggplot(sales_data, aes(x = Holiday, y = Sales, fill = Holiday)) +
  geom_boxplot() +
  labs(title = "Sales During Holidays vs Non-Holidays")

# Competitor Activity
ggplot(sales_data, aes(x = CompetitorActivity, y = Sales, fill = CompetitorActivity)) +
  geom_boxplot() +
  labs(title = "Sales vs Competitor Activity Level")

3 3. Predictive Modeling

3.1 3.1 Linear Regression Model

# Build linear model
model <- lm(Sales ~ Promotion + Holiday + CompetitorActivity, data = sales_data)

# View model summary
summary(model)
## 
## Call:
## lm(formula = Sales ~ Promotion + Holiday + CompetitorActivity, 
##     data = sales_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.2811  -7.6153  -0.8176   5.8278  23.7119 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                47.075      3.039  15.488   <2e-16 ***
## PromotionYes                7.070      2.926   2.416   0.0196 *  
## HolidayYes                 -0.687      2.903  -0.237   0.8140    
## CompetitorActivityLow       2.494      3.473   0.718   0.4764    
## CompetitorActivityMedium   -3.188      3.687  -0.865   0.3916    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.29 on 47 degrees of freedom
## Multiple R-squared:  0.154,  Adjusted R-squared:  0.08203 
## F-statistic: 2.139 on 4 and 47 DF,  p-value: 0.09067

3.2 3.2 Interpretation

  • The coefficients tell us the expected change in sales due to each predictor.
  • A significant p-value (< 0.05) for a variable indicates it has a statistically significant impact on sales.
  • The intercept represents the baseline weekly sales with no promotion, no holiday, and low competitor activity.

4 4. Model Evaluation

# Predict and compute MSE
predicted_sales <- predict(model, sales_data)
mse <- mean((sales_data$Sales - predicted_sales)^2)
mse
## [1] 95.7844

4.1 4.1 Bias-Variance Discussion

  • This linear model is interpretable but potentially underfits if relationships are non-linear or interactions exist.
  • A low MSE indicates good model fit, but since this model is relatively simple, it may have high bias and low variance.
  • Future improvements may include interaction terms or polynomial features.

5 5. Conclusion

Based on our analysis:

Recommendation: The marketing team should continue leveraging promotions, especially during low competitor activity periods, to maximize effectiveness. A more complex model may be explored later to better capture nuanced effects and interaction terms.