# Load the data
sales_data <- read.csv("sales_data.csv")
# Preview structure
str(sales_data)
## 'data.frame': 52 obs. of 5 variables:
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Promotion : chr "Yes" "No" "No" "Yes" ...
## $ Holiday : chr "No" "No" "No" "Yes" ...
## $ CompetitorActivity: chr "High" "Low" "High" "High" ...
## $ Sales : num 47.8 45.8 60 54.3 43.5 ...
# Convert categorical variables
sales_data$Promotion <- as.factor(sales_data$Promotion)
sales_data$Holiday <- as.factor(sales_data$Holiday)
sales_data$CompetitorActivity <- as.factor(sales_data$CompetitorActivity)
# Check for missing values
colSums(is.na(sales_data))
## Week Promotion Holiday CompetitorActivity
## 0 0 0 0
## Sales
## 0
summary(sales_data)
## Week Promotion Holiday CompetitorActivity Sales
## Min. : 1.00 No :31 No :27 High :16 Min. :26.62
## 1st Qu.:13.75 Yes:21 Yes:25 Low :20 1st Qu.:42.34
## Median :26.50 Medium:16 Median :48.33
## Mean :26.50 Mean :49.58
## 3rd Qu.:39.25 3rd Qu.:55.41
## Max. :52.00 Max. :73.28
ggplot(sales_data, aes(x = Week, y = Sales)) +
geom_line(color = "steelblue") +
labs(title = "Weekly Sales Over Time", x = "Week", y = "Sales (in $1000s)")
# Promotion
ggplot(sales_data, aes(x = Promotion, y = Sales, fill = Promotion)) +
geom_boxplot() +
labs(title = "Sales During vs Outside Promotions")
# Holiday
ggplot(sales_data, aes(x = Holiday, y = Sales, fill = Holiday)) +
geom_boxplot() +
labs(title = "Sales During Holidays vs Non-Holidays")
# Competitor Activity
ggplot(sales_data, aes(x = CompetitorActivity, y = Sales, fill = CompetitorActivity)) +
geom_boxplot() +
labs(title = "Sales vs Competitor Activity Level")
# Build linear model
model <- lm(Sales ~ Promotion + Holiday + CompetitorActivity, data = sales_data)
# View model summary
summary(model)
##
## Call:
## lm(formula = Sales ~ Promotion + Holiday + CompetitorActivity,
## data = sales_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.2811 -7.6153 -0.8176 5.8278 23.7119
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.075 3.039 15.488 <2e-16 ***
## PromotionYes 7.070 2.926 2.416 0.0196 *
## HolidayYes -0.687 2.903 -0.237 0.8140
## CompetitorActivityLow 2.494 3.473 0.718 0.4764
## CompetitorActivityMedium -3.188 3.687 -0.865 0.3916
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.29 on 47 degrees of freedom
## Multiple R-squared: 0.154, Adjusted R-squared: 0.08203
## F-statistic: 2.139 on 4 and 47 DF, p-value: 0.09067
# Predict and compute MSE
predicted_sales <- predict(model, sales_data)
mse <- mean((sales_data$Sales - predicted_sales)^2)
mse
## [1] 95.7844
Based on our analysis:
Recommendation: The marketing team should continue leveraging promotions, especially during low competitor activity periods, to maximize effectiveness. A more complex model may be explored later to better capture nuanced effects and interaction terms.