Load required libraries
cat("Loading required libraries...\n")
## Loading required libraries...
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Read the cleaned data exported from csv
cat("Reading cleaned marketing data from CSV...\n")
## Reading cleaned marketing data from CSV...
cleaned_df <- read.csv('marketing_cleaned_data.csv')
cat("Data loaded. Rows:", nrow(cleaned_df), "Columns:", ncol(cleaned_df), "\n\n")
## Data loaded. Rows: 100 Columns: 5
Remove NAs and outliers in conversion_rate
cat("Cleaning data: removing NAs and outliers in conversion_rate...\n")
## Cleaning data: removing NAs and outliers in conversion_rate...
data_r <- na.omit(cleaned_df)
q_low <- quantile(data_r$conversion_rate, 0.01)
q_high <- quantile(data_r$conversion_rate, 0.99)
data_r <- subset(data_r, conversion_rate >= q_low & conversion_rate <= q_high)
cat("Data cleaned. Remaining rows:", nrow(data_r), "\n\n")
## Data cleaned. Remaining rows: 98
CHANNEL-WISE MEAN CONVERSION RATES
group_means <- data_r %>% group_by(channel) %>% summarise(mean_conversion = mean(conversion_rate), n = n())
group_means
## # A tibble: 4 × 3
## channel mean_conversion n
## <chr> <dbl> <int>
## 1 Digital 0.0106 33
## 2 Print 0.0144 19
## 3 Radio 0.0131 26
## 4 TV 0.0137 20
cat("\nInterpretation: The table above shows the average conversion rate and sample size for each marketing channel.\n\n")
##
## Interpretation: The table above shows the average conversion rate and sample size for each marketing channel.
ANOVA: Are conversion rates significantly different across
channels?
anova_res <- aov(conversion_rate ~ channel, data = data_r)
summary(anova_res)
## Df Sum Sq Mean Sq F value Pr(>F)
## channel 3 0.000225 7.500e-05 1.082 0.361
## Residuals 94 0.006519 6.935e-05
cat("\nInterpretation: If the p-value is < 0.05, there is a statistically significant difference in conversion rates between at least two channels.\n\n")
##
## Interpretation: If the p-value is < 0.05, there is a statistically significant difference in conversion rates between at least two channels.
POST-HOC TUKEY HSD TEST (if ANOVA is significant)
if(summary(anova_res)[[1]][["Pr(>F)"]][1] < 0.05) {
tukey <- TukeyHSD(anova_res)
print(tukey)
cat("\nInterpretation: The Tukey HSD test identifies which specific channels differ from each other.\n\n")
} else {
cat("\nNo significant differences found between channels (ANOVA p >= 0.05).\n\n")
}
##
## No significant differences found between channels (ANOVA p >= 0.05).
VISUALIZATION: Conversion Rate by Channel
ggplot(data_r, aes(x=channel, y=conversion_rate, fill=channel)) +
geom_boxplot(alpha=0.7, outlier.shape=NA) +
geom_jitter(width=0.2, alpha=0.4, color="black") +
labs(title="Conversion Rate by Channel",
x="Channel", y="Conversion Rate") +
theme_minimal()

cat("\nInterpretation: The boxplot above visualizes the spread and central tendency of conversion rates for each channel.\n\n")
##
## Interpretation: The boxplot above visualizes the spread and central tendency of conversion rates for each channel.
VISUALIZATION: Density Plot of Conversion Rates
ggplot(data_r, aes(x=conversion_rate, fill=channel)) +
geom_density(alpha=0.4) +
labs(title="Density of Conversion Rate by Channel",
x="Conversion Rate", y="Density") +
theme_minimal()

cat("\nInterpretation: The density plot shows the distribution and overlap of conversion rates across channels.\n\n")
##
## Interpretation: The density plot shows the distribution and overlap of conversion rates across channels.
SUMMARY STATISTICS BY CHANNEL
sum_stats <- data_r %>% group_by(channel) %>% summarise(
mean = mean(conversion_rate),
median = median(conversion_rate),
sd = sd(conversion_rate),
min = min(conversion_rate),
max = max(conversion_rate),
n = n()
)
sum_stats
## # A tibble: 4 × 7
## channel mean median sd min max n
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 Digital 0.0106 0.00958 0.00704 0.00215 0.0334 33
## 2 Print 0.0144 0.0124 0.00850 0.00265 0.0336 19
## 3 Radio 0.0131 0.0105 0.00925 0.00155 0.0387 26
## 4 TV 0.0137 0.0113 0.00887 0.00385 0.0362 20
cat("\nInterpretation: Use these statistics to compare the performance and consistency of each channel.\n\n")
##
## Interpretation: Use these statistics to compare the performance and consistency of each channel.