Question 1: Display Data Analysis
# Import the dataset
display_data <- read.csv("Display_data.csv")
# Take a look at the data structure
str(display_data)
## 'data.frame': 29 obs. of 8 variables:
## $ spend : num 22.6 37.3 55.6 45.4 50.2 ...
## $ clicks : int 165 228 291 247 290 172 68 112 306 300 ...
## $ impressions : int 8672 11875 14631 11709 14768 8698 2924 5919 14789 14818 ...
## $ display : int 0 0 0 0 0 0 0 0 0 0 ...
## $ transactions: int 2 2 3 2 3 2 1 1 3 3 ...
## $ revenue : num 58.9 44.9 141.6 209.8 197.7 ...
## $ ctr : num 1.9 1.92 1.99 2.11 1.96 1.98 2.33 1.89 2.07 2.02 ...
## $ con_rate : num 1.21 0.88 1.03 0.81 1.03 1.16 1.47 0.89 0.98 1 ...
summary(display_data)
## spend clicks impressions display
## Min. : 1.12 Min. : 48.0 Min. : 1862 Min. :0.0000
## 1st Qu.:28.73 1st Qu.:172.0 1st Qu.: 6048 1st Qu.:0.0000
## Median :39.68 Median :241.0 Median : 9934 Median :0.0000
## Mean :44.22 Mean :257.1 Mean :11858 Mean :0.3103
## 3rd Qu.:55.57 3rd Qu.:303.0 3rd Qu.:14789 3rd Qu.:1.0000
## Max. :91.28 Max. :593.0 Max. :29324 Max. :1.0000
## transactions revenue ctr con_rate
## Min. :1.000 Min. : 16.16 Min. :1.890 Min. :0.810
## 1st Qu.:2.000 1st Qu.:117.32 1st Qu.:1.970 1st Qu.:0.990
## Median :3.000 Median :235.16 Median :2.020 Median :1.130
## Mean :2.966 Mean :223.50 Mean :2.306 Mean :1.227
## 3rd Qu.:4.000 3rd Qu.:298.92 3rd Qu.:2.790 3rd Qu.:1.470
## Max. :6.000 Max. :522.00 Max. :3.290 Max. :2.080
# Check for missing values
sum(is.na(display_data))
## [1] 0
# Create scatter plot of spend vs. revenue
ggplot(display_data, aes(x = spend, y = revenue, color = factor(display))) +
geom_point() +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Relationship between Ad Spend and Revenue",
x = "Ad Spend ($)",
y = "Revenue ($)",
color = "Display Campaign") +
theme_minimal()

# Simple Regression (Revenue ~ Spend)
model1 <- lm(revenue ~ spend, data = display_data)
summary(model1)
##
## Call:
## lm(formula = revenue ~ spend, data = display_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -145.210 -54.647 1.117 67.780 149.476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.9397 37.9668 0.288 0.775
## spend 4.8066 0.7775 6.182 1.31e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 86.71 on 27 degrees of freedom
## Multiple R-squared: 0.586, Adjusted R-squared: 0.5707
## F-statistic: 38.22 on 1 and 27 DF, p-value: 1.311e-06
Question 2: AB Testing Analysis
# Import the dataset
ab_data <- read.csv("ab_testing1.csv")
# Check the data structure
str(ab_data)
## 'data.frame': 29 obs. of 2 variables:
## $ Ads : int 1 0 2 0 1 1 2 2 2 0 ...
## $ Purchase: int 152 21 77 65 183 87 121 104 116 82 ...
summary(ab_data)
## Ads Purchase
## Min. :0.000 Min. : 14.00
## 1st Qu.:0.000 1st Qu.: 51.00
## Median :1.000 Median : 77.00
## Mean :1.069 Mean : 76.07
## 3rd Qu.:2.000 3rd Qu.:104.00
## Max. :2.000 Max. :183.00
# Convert Ads to a factor variable
ab_data$Ads <- factor(ab_data$Ads)
# Calculate descriptive statistics by group
group_stats <- ab_data %>%
group_by(Ads) %>%
summarize(
count = n(),
mean = mean(Purchase),
sd = sd(Purchase),
min = min(Purchase),
max = max(Purchase)
)
print(group_stats)
## # A tibble: 3 × 6
## Ads count mean sd min max
## <fct> <int> <dbl> <dbl> <int> <int>
## 1 0 10 49 27.2 21 85
## 2 1 7 119. 40.3 61 183
## 3 2 12 73.8 31.1 14 121
# Create a boxplot to visualize the distribution by ad group
ggplot(ab_data, aes(x = Ads, y = Purchase, fill = Ads)) +
geom_boxplot() +
labs(title = "Purchase Amount by Advertisement Version",
x = "Advertisement Version (0 = Control, 1 = Version 1, 2 = Version 2)",
y = "Purchase Amount ($)") +
theme_minimal()

# Run regression with Ads as a categorical predictor
model_ab <- lm(Purchase ~ Ads, data = ab_data)
summary(model_ab)
##
## Call:
## lm(formula = Purchase ~ Ads, data = ab_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -59.75 -22.75 -3.75 30.25 64.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.00 10.21 4.800 5.69e-05 ***
## Ads1 69.71 15.91 4.383 0.000171 ***
## Ads2 24.75 13.82 1.791 0.084982 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.28 on 26 degrees of freedom
## Multiple R-squared: 0.4262, Adjusted R-squared: 0.3821
## F-statistic: 9.656 on 2 and 26 DF, p-value: 0.0007308
# ANOVA to test if there are significant differences between groups
anova_result <- aov(Purchase ~ Ads, data = ab_data)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## Ads 2 20122 10061 9.656 0.000731 ***
## Residuals 26 27090 1042
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Post-hoc comparison to see if Ad1 and Ad2 differ significantly
pairwise_comparison <- pairwise.t.test(ab_data$Purchase, ab_data$Ads, p.adjust.method = "bonferroni")
print(pairwise_comparison)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: ab_data$Purchase and ab_data$Ads
##
## 0 1
## 1 0.00051 -
## 2 0.25495 0.02096
##
## P value adjustment method: bonferroni
# Create a function for mean_se since it's not directly available
mean_se <- function(x) {
return(data.frame(y = mean(x),
ymin = mean(x) - (sd(x)/sqrt(length(x))),
ymax = mean(x) + (sd(x)/sqrt(length(x)))))
}
# Create bar plot with error bars to show mean purchase by ad group
ggplot(ab_data, aes(x = Ads, y = Purchase, fill = Ads)) +
stat_summary(fun = mean, geom = "bar") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
labs(title = "Mean Purchase Amount by Advertisement Version",
x = "Advertisement Version (0 = Control, 1 = Version 1, 2 = Version 2)",
y = "Mean Purchase Amount ($)") +
theme_minimal()

#Q1 Describe your hypotheses (or hypotheses if you have more than one).
H0: The higher you spend on advertising does not lead to higher revenue.
H1: The higher advertising spending leads to higher revenue.
#Q3 Explain your outcome and make managerial recommendations.
From the multiple regression the "Spend" variable is highly significant that does mean that the more you spend on an ad it will increase the revenue so it has a positive impact.
#Q1Describe your hypotheses (or hypotheses if you have more than one).
H0: There won't be a significant difference in the number of purchases between the three groups.
H1: One verison will lead to a higher purchases.
#Q2Explain your outcome and make managerial recommendations
From the results there was a significant in Ad 1 that was positive.
There is no significant increase in purchases to compared.