library(tidyverse)
library(scales)
library(ggthemes)
library(knitr)
library(kableExtra)
library(gridExtra)
library(corrplot)
library(ggrepel)df <- read_csv("cleaned_social_media_advertising.csv") %>%
mutate(
acquisition_cost = as.numeric(gsub("\\$|,", "", acquisition_cost)),
date = as.Date(date),
month = floor_date(date, "month")
)tibble(
Metric = c("Total campaigns", "Channels analyzed",
"Customer segments", "Cities covered", "Time period"),
Value = c(
format(nrow(df), big.mark = ","),
n_distinct(df$channel_used),
n_distinct(df$customer_segment),
n_distinct(df$location),
paste(min(df$date), "→", max(df$date))
)
) %>%
kable(align = c("l", "r"), caption = "Dataset dimensions") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)| Metric | Value |
|---|---|
| Total campaigns | 300,000 |
| Channels analyzed | 4 |
| Customer segments | 5 |
| Cities covered | 5 |
| Time period | 2022-01-01 → 2022-12-31 |
tibble(
KPI = c("Avg. Conversion Rate", "Avg. ROI", "Avg. Clicks",
"Avg. Impressions", "Avg. Engagement Score", "Avg. Acquisition Cost"),
Value = c(
percent(mean(df$conversion_rate), accuracy = 0.1),
as.character(round(mean(df$roi), 2)),
format(round(mean(df$clicks), 0), big.mark = ","),
format(round(mean(df$impressions), 0), big.mark = ","),
as.character(round(mean(df$engagement_score), 2)),
dollar(mean(df$acquisition_cost, na.rm = TRUE))
)
) %>%
kable(align = c("l", "r"), caption = "Global KPIs across all campaigns") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)| KPI | Value |
|---|---|
| Avg. Conversion Rate | 8.0% |
| Avg. ROI | 3.18 |
| Avg. Clicks | 18,154 |
| Avg. Impressions | 56,034 |
| Avg. Engagement Score | 4.37 |
| Avg. Acquisition Cost | $7,753.75 |
df %>%
group_by(channel_used) %>%
summarise(
conversion_mean = mean(conversion_rate),
roi_mean = mean(roi),
n = n(),
.groups = "drop"
) %>%
ggplot(aes(x = reorder(channel_used, -conversion_mean),
y = conversion_mean, fill = channel_used)) +
geom_col(width = 0.6, show.legend = FALSE) +
geom_text(aes(label = percent(conversion_mean, accuracy = 0.1)),
vjust = -0.5, size = 4, fontface = "bold") +
scale_y_continuous(labels = percent_format(),
expand = expansion(mult = c(0, .12))) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Average Conversion Rate by Channel",
subtitle = "Which platform converts best?",
x = NULL, y = "Conversion Rate") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))df %>%
ggplot(aes(x = channel_used, y = roi, fill = channel_used)) +
geom_violin(alpha = 0.6, show.legend = FALSE) +
geom_boxplot(width = 0.15, fill = "white", outlier.shape = NA) +
scale_fill_brewer(palette = "Set2") +
labs(title = "ROI Distribution by Channel",
subtitle = "Violin + boxplot to show real spread",
x = NULL, y = "ROI") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))df %>%
group_by(channel_used) %>%
summarise(eng_mean = mean(engagement_score), .groups = "drop") %>%
ggplot(aes(x = reorder(channel_used, eng_mean), y = eng_mean, fill = channel_used)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = round(eng_mean, 2)), hjust = -0.2, fontface = "bold") +
coord_flip() +
scale_fill_brewer(palette = "Pastel1") +
scale_y_continuous(expand = expansion(mult = c(0, .15))) +
labs(title = "Average Engagement Score by Channel",
x = NULL, y = "Engagement Score") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))df %>%
group_by(campaign_goal) %>%
summarise(
conv = mean(conversion_rate),
roi = mean(roi),
.groups = "drop"
) %>%
pivot_longer(c(conv, roi), names_to = "metric", values_to = "value") %>%
mutate(metric = recode(metric, conv = "Conversion Rate", roi = "ROI")) %>%
ggplot(aes(x = reorder(campaign_goal, -value), y = value, fill = campaign_goal)) +
geom_col(show.legend = FALSE) +
facet_wrap(~metric, scales = "free_y") +
scale_fill_brewer(palette = "Set3") +
labs(title = "Conversion Rate & ROI by Campaign Goal",
x = NULL, y = NULL) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 20, hjust = 1)
)df %>%
group_by(channel_used, campaign_goal) %>%
summarise(roi_mean = mean(roi), .groups = "drop") %>%
ggplot(aes(x = channel_used, y = campaign_goal, fill = roi_mean)) +
geom_tile(color = "white", linewidth = 0.8) +
geom_text(aes(label = round(roi_mean, 2)), size = 4, fontface = "bold") +
scale_fill_gradient(low = "#fee8c8", high = "#d7301f", name = "Avg.\nROI") +
labs(title = "Heatmap: Average ROI — Channel × Campaign Goal",
subtitle = "Darker cells = higher return",
x = "Channel", y = "Campaign Goal") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))df %>%
ggplot(aes(x = reorder(customer_segment, roi, FUN = median),
y = roi, fill = customer_segment)) +
geom_boxplot(show.legend = FALSE, outlier.alpha = 0.1) +
coord_flip() +
scale_fill_brewer(palette = "Set2") +
labs(title = "ROI Distribution by Customer Segment",
subtitle = "Which segment generates the most return?",
x = NULL, y = "ROI") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))df %>%
group_by(customer_segment, channel_used) %>%
summarise(conv = mean(conversion_rate), .groups = "drop") %>%
ggplot(aes(x = channel_used, y = conv, color = customer_segment,
group = customer_segment)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
scale_y_continuous(labels = percent_format()) +
scale_color_brewer(palette = "Dark2", name = "Segment") +
labs(title = "Conversion Rate: Channel × Customer Segment",
subtitle = "How conversion varies per channel across segments",
x = "Channel", y = "Conversion Rate") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"),
legend.position = "bottom")df %>%
group_by(target_audience) %>%
summarise(
conv = mean(conversion_rate),
roi = mean(roi),
.groups = "drop"
) %>%
ggplot(aes(x = conv, y = roi, label = target_audience, color = target_audience)) +
geom_point(size = 5, show.legend = FALSE) +
geom_text_repel(size = 3.5, show.legend = FALSE, max.overlaps = 20) +
scale_x_continuous(labels = percent_format()) +
scale_color_brewer(palette = "Paired") +
labs(title = "Conversion Rate vs. ROI by Target Audience",
subtitle = "Top-right quadrant = best overall performance",
x = "Conversion Rate", y = "Average ROI") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))df %>%
group_by(location) %>%
summarise(roi_mean = mean(roi), .groups = "drop") %>%
ggplot(aes(x = reorder(location, roi_mean), y = roi_mean, fill = location)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = round(roi_mean, 2)), hjust = -0.2, fontface = "bold") +
coord_flip() +
scale_fill_brewer(palette = "Blues") +
scale_y_continuous(expand = expansion(mult = c(0, .15))) +
labs(title = "Average ROI by City",
x = NULL, y = "Average ROI") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"))df %>%
count(language, channel_used) %>%
group_by(language) %>%
mutate(prop = n / sum(n)) %>%
ungroup() %>%
ggplot(aes(x = language, y = prop, fill = channel_used)) +
geom_col(position = "fill") +
scale_y_continuous(labels = percent_format()) +
scale_fill_brewer(palette = "Set2", name = "Channel") +
labs(title = "Channel Distribution by Campaign Language",
subtitle = "Which channel dominates in each language?",
x = "Language", y = "Proportion") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"),
legend.position = "bottom")df %>%
group_by(month, channel_used) %>%
summarise(roi_mean = mean(roi), .groups = "drop") %>%
ggplot(aes(x = month, y = roi_mean, color = channel_used, group = channel_used)) +
geom_line(linewidth = 1) +
geom_smooth(se = FALSE, linetype = "dashed", linewidth = 0.5) +
scale_color_brewer(palette = "Set1", name = "Channel") +
scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
labs(title = "Monthly ROI Trend by Channel",
subtitle = "Dashed line = smoothed trend",
x = NULL, y = "Average ROI") +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 30, hjust = 1),
legend.position = "bottom"
)df %>%
count(month) %>%
ggplot(aes(x = month, y = n)) +
geom_area(fill = "#3182bd", alpha = 0.4) +
geom_line(color = "#3182bd", linewidth = 1) +
scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
scale_y_continuous(labels = comma) +
labs(title = "Monthly Campaign Volume",
x = NULL, y = "Number of Campaigns") +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 30, hjust = 1)
)num_vars <- df %>%
select(conversion_rate, roi, clicks, impressions,
engagement_score, acquisition_cost) %>%
drop_na()
M <- cor(num_vars)
corrplot(M,
method = "color",
type = "upper",
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
col = colorRampPalette(c("#d73027", "#f7f7f7", "#4575b4"))(200),
title = "Correlation Matrix — Numeric Variables",
mar = c(0, 0, 2, 0))df %>%
sample_n(5000) %>%
ggplot(aes(x = clicks, y = conversion_rate, color = channel_used)) +
geom_point(alpha = 0.4, size = 1.5) +
geom_smooth(method = "lm", se = FALSE, linewidth = 1.2) +
scale_y_continuous(labels = percent_format()) +
scale_x_continuous(labels = comma) +
scale_color_brewer(palette = "Set1", name = "Channel") +
labs(title = "Clicks vs. Conversion Rate by Channel",
subtitle = "Random sample of 5,000 campaigns",
x = "Clicks", y = "Conversion Rate") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold"),
legend.position = "bottom")df %>%
group_by(channel_used, campaign_goal) %>%
summarise(
Campaigns = n(),
`Conv. Rate` = percent(mean(conversion_rate), accuracy = 0.1),
`Avg. ROI` = round(mean(roi), 2),
`Eng. Score` = round(mean(engagement_score), 1),
.groups = "drop"
) %>%
rename(Channel = channel_used, Goal = campaign_goal) %>%
arrange(Channel, desc(`Avg. ROI`)) %>%
kable(align = c("l", "l", "r", "r", "r", "r"),
caption = "Summary Table: Channel × Campaign Goal") %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = TRUE
) %>%
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")| Channel | Goal | Campaigns | Conv. Rate | Avg. ROI | Eng. Score |
|---|---|---|---|---|---|
| Product Launch | 18775 | 8.0% | 4.00 | 5.5 | |
| Brand Awareness | 18836 | 8.0% | 3.99 | 5.5 | |
| Increase Sales | 18663 | 8.0% | 3.98 | 5.5 | |
| Market Expansion | 18890 | 8.0% | 3.98 | 5.5 | |
| Product Launch | 18874 | 7.9% | 4.02 | 5.5 | |
| Brand Awareness | 18774 | 8.0% | 4.01 | 5.5 | |
| Increase Sales | 18690 | 7.9% | 4.01 | 5.5 | |
| Market Expansion | 18763 | 8.0% | 4.00 | 5.5 | |
| Brand Awareness | 18918 | 8.0% | 0.72 | 1.0 | |
| Increase Sales | 18732 | 8.0% | 0.72 | 1.0 | |
| Product Launch | 18814 | 8.0% | 0.72 | 1.0 | |
| Market Expansion | 18618 | 8.0% | 0.71 | 1.0 | |
| Product Launch | 18567 | 8.0% | 4.04 | 5.5 | |
| Increase Sales | 18878 | 8.0% | 4.01 | 5.5 | |
| Brand Awareness | 18720 | 8.0% | 3.99 | 5.5 | |
| Market Expansion | 18488 | 8.1% | 3.97 | 5.5 |
Channel with highest average ROI: Instagram
Most profitable customer segment: Technology
Campaign goal with highest conversion: Market Expansion
Key takeaways for your presentation:
Analysis automatically generated in R · 2026-04-19