library(tidyverse)
library(scales)
library(ggthemes)
library(knitr)
library(kableExtra)
library(gridExtra)
library(corrplot)
library(ggrepel)
df <- read_csv("cleaned_social_media_advertising.csv") %>%
  mutate(
    acquisition_cost = as.numeric(gsub("\\$|,", "", acquisition_cost)),
    date             = as.Date(date),
    month            = floor_date(date, "month")
  )

1 Executive Summary

1.1 Dataset Overview

tibble(
  Metric  = c("Total campaigns", "Channels analyzed",
               "Customer segments", "Cities covered", "Time period"),
  Value   = c(
    format(nrow(df), big.mark = ","),
    n_distinct(df$channel_used),
    n_distinct(df$customer_segment),
    n_distinct(df$location),
    paste(min(df$date), "→", max(df$date))
  )
) %>%
  kable(align = c("l", "r"), caption = "Dataset dimensions") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
Dataset dimensions
Metric Value
Total campaigns 300,000
Channels analyzed 4
Customer segments 5
Cities covered 5
Time period 2022-01-01 → 2022-12-31

1.2 Global KPIs

tibble(
  KPI   = c("Avg. Conversion Rate", "Avg. ROI", "Avg. Clicks",
             "Avg. Impressions", "Avg. Engagement Score", "Avg. Acquisition Cost"),
  Value = c(
    percent(mean(df$conversion_rate), accuracy = 0.1),
    as.character(round(mean(df$roi), 2)),
    format(round(mean(df$clicks), 0), big.mark = ","),
    format(round(mean(df$impressions), 0), big.mark = ","),
    as.character(round(mean(df$engagement_score), 2)),
    dollar(mean(df$acquisition_cost, na.rm = TRUE))
  )
) %>%
  kable(align = c("l", "r"), caption = "Global KPIs across all campaigns") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)
Global KPIs across all campaigns
KPI Value
Avg. Conversion Rate 8.0%
Avg. ROI 3.18
Avg. Clicks 18,154
Avg. Impressions 56,034
Avg. Engagement Score 4.37
Avg. Acquisition Cost $7,753.75

2 Channel Performance

2.1 Conversion Rate by Channel

df %>%
  group_by(channel_used) %>%
  summarise(
    conversion_mean = mean(conversion_rate),
    roi_mean        = mean(roi),
    n               = n(),
    .groups = "drop"
  ) %>%
  ggplot(aes(x = reorder(channel_used, -conversion_mean),
             y = conversion_mean, fill = channel_used)) +
  geom_col(width = 0.6, show.legend = FALSE) +
  geom_text(aes(label = percent(conversion_mean, accuracy = 0.1)),
            vjust = -0.5, size = 4, fontface = "bold") +
  scale_y_continuous(labels = percent_format(),
                     expand = expansion(mult = c(0, .12))) +
  scale_fill_brewer(palette = "Set2") +
  labs(title    = "Average Conversion Rate by Channel",
       subtitle = "Which platform converts best?",
       x = NULL, y = "Conversion Rate") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

2.2 ROI Distribution by Channel

df %>%
  ggplot(aes(x = channel_used, y = roi, fill = channel_used)) +
  geom_violin(alpha = 0.6, show.legend = FALSE) +
  geom_boxplot(width = 0.15, fill = "white", outlier.shape = NA) +
  scale_fill_brewer(palette = "Set2") +
  labs(title    = "ROI Distribution by Channel",
       subtitle = "Violin + boxplot to show real spread",
       x = NULL, y = "ROI") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

2.3 Engagement Score by Channel

df %>%
  group_by(channel_used) %>%
  summarise(eng_mean = mean(engagement_score), .groups = "drop") %>%
  ggplot(aes(x = reorder(channel_used, eng_mean), y = eng_mean, fill = channel_used)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = round(eng_mean, 2)), hjust = -0.2, fontface = "bold") +
  coord_flip() +
  scale_fill_brewer(palette = "Pastel1") +
  scale_y_continuous(expand = expansion(mult = c(0, .15))) +
  labs(title = "Average Engagement Score by Channel",
       x = NULL, y = "Engagement Score") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))


3 Campaign Goal vs. Results

3.1 Conversion & ROI by Campaign Goal

df %>%
  group_by(campaign_goal) %>%
  summarise(
    conv    = mean(conversion_rate),
    roi     = mean(roi),
    .groups = "drop"
  ) %>%
  pivot_longer(c(conv, roi), names_to = "metric", values_to = "value") %>%
  mutate(metric = recode(metric, conv = "Conversion Rate", roi = "ROI")) %>%
  ggplot(aes(x = reorder(campaign_goal, -value), y = value, fill = campaign_goal)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~metric, scales = "free_y") +
  scale_fill_brewer(palette = "Set3") +
  labs(title = "Conversion Rate & ROI by Campaign Goal",
       x = NULL, y = NULL) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title  = element_text(face = "bold"),
    axis.text.x = element_text(angle = 20, hjust = 1)
  )

3.2 Channel × Goal — ROI Heatmap

df %>%
  group_by(channel_used, campaign_goal) %>%
  summarise(roi_mean = mean(roi), .groups = "drop") %>%
  ggplot(aes(x = channel_used, y = campaign_goal, fill = roi_mean)) +
  geom_tile(color = "white", linewidth = 0.8) +
  geom_text(aes(label = round(roi_mean, 2)), size = 4, fontface = "bold") +
  scale_fill_gradient(low = "#fee8c8", high = "#d7301f", name = "Avg.\nROI") +
  labs(title    = "Heatmap: Average ROI — Channel × Campaign Goal",
       subtitle = "Darker cells = higher return",
       x = "Channel", y = "Campaign Goal") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))


4 Customer Segment Analysis

4.1 ROI by Segment

df %>%
  ggplot(aes(x = reorder(customer_segment, roi, FUN = median),
             y = roi, fill = customer_segment)) +
  geom_boxplot(show.legend = FALSE, outlier.alpha = 0.1) +
  coord_flip() +
  scale_fill_brewer(palette = "Set2") +
  labs(title    = "ROI Distribution by Customer Segment",
       subtitle = "Which segment generates the most return?",
       x = NULL, y = "ROI") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

4.2 Conversion Rate by Segment & Channel

df %>%
  group_by(customer_segment, channel_used) %>%
  summarise(conv = mean(conversion_rate), .groups = "drop") %>%
  ggplot(aes(x = channel_used, y = conv, color = customer_segment,
             group = customer_segment)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 3) +
  scale_y_continuous(labels = percent_format()) +
  scale_color_brewer(palette = "Dark2", name = "Segment") +
  labs(title    = "Conversion Rate: Channel × Customer Segment",
       subtitle = "How conversion varies per channel across segments",
       x = "Channel", y = "Conversion Rate") +
  theme_minimal(base_size = 13) +
  theme(plot.title      = element_text(face = "bold"),
        legend.position = "bottom")


5 Target Audience

5.1 Conversion vs. ROI by Audience

df %>%
  group_by(target_audience) %>%
  summarise(
    conv = mean(conversion_rate),
    roi  = mean(roi),
    .groups = "drop"
  ) %>%
  ggplot(aes(x = conv, y = roi, label = target_audience, color = target_audience)) +
  geom_point(size = 5, show.legend = FALSE) +
  geom_text_repel(size = 3.5, show.legend = FALSE, max.overlaps = 20) +
  scale_x_continuous(labels = percent_format()) +
  scale_color_brewer(palette = "Paired") +
  labs(title    = "Conversion Rate vs. ROI by Target Audience",
       subtitle = "Top-right quadrant = best overall performance",
       x = "Conversion Rate", y = "Average ROI") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))


6 Geography & Language

6.1 ROI by City

df %>%
  group_by(location) %>%
  summarise(roi_mean = mean(roi), .groups = "drop") %>%
  ggplot(aes(x = reorder(location, roi_mean), y = roi_mean, fill = location)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = round(roi_mean, 2)), hjust = -0.2, fontface = "bold") +
  coord_flip() +
  scale_fill_brewer(palette = "Blues") +
  scale_y_continuous(expand = expansion(mult = c(0, .15))) +
  labs(title = "Average ROI by City",
       x = NULL, y = "Average ROI") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

6.2 Preferred Channel by Language

df %>%
  count(language, channel_used) %>%
  group_by(language) %>%
  mutate(prop = n / sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x = language, y = prop, fill = channel_used)) +
  geom_col(position = "fill") +
  scale_y_continuous(labels = percent_format()) +
  scale_fill_brewer(palette = "Set2", name = "Channel") +
  labs(title    = "Channel Distribution by Campaign Language",
       subtitle = "Which channel dominates in each language?",
       x = "Language", y = "Proportion") +
  theme_minimal(base_size = 13) +
  theme(plot.title      = element_text(face = "bold"),
        legend.position = "bottom")


8 Correlations & Numeric Metrics

8.1 Correlation Matrix

num_vars <- df %>%
  select(conversion_rate, roi, clicks, impressions,
         engagement_score, acquisition_cost) %>%
  drop_na()

M <- cor(num_vars)

corrplot(M,
         method      = "color",
         type        = "upper",
         addCoef.col = "black",
         tl.col      = "black",
         tl.srt      = 45,
         col         = colorRampPalette(c("#d73027", "#f7f7f7", "#4575b4"))(200),
         title       = "Correlation Matrix — Numeric Variables",
         mar         = c(0, 0, 2, 0))

8.2 Clicks vs. Conversion Rate (by Channel)

df %>%
  sample_n(5000) %>%
  ggplot(aes(x = clicks, y = conversion_rate, color = channel_used)) +
  geom_point(alpha = 0.4, size = 1.5) +
  geom_smooth(method = "lm", se = FALSE, linewidth = 1.2) +
  scale_y_continuous(labels = percent_format()) +
  scale_x_continuous(labels = comma) +
  scale_color_brewer(palette = "Set1", name = "Channel") +
  labs(title    = "Clicks vs. Conversion Rate by Channel",
       subtitle = "Random sample of 5,000 campaigns",
       x = "Clicks", y = "Conversion Rate") +
  theme_minimal(base_size = 13) +
  theme(plot.title      = element_text(face = "bold"),
        legend.position = "bottom")


9 Comparative Summary Table

df %>%
  group_by(channel_used, campaign_goal) %>%
  summarise(
    Campaigns      = n(),
    `Conv. Rate`   = percent(mean(conversion_rate), accuracy = 0.1),
    `Avg. ROI`     = round(mean(roi), 2),
    `Eng. Score`   = round(mean(engagement_score), 1),
    .groups = "drop"
  ) %>%
  rename(Channel = channel_used, Goal = campaign_goal) %>%
  arrange(Channel, desc(`Avg. ROI`)) %>%
  kable(align   = c("l", "l", "r", "r", "r", "r"),
        caption = "Summary Table: Channel × Campaign Goal") %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = TRUE
  ) %>%
  row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")
Summary Table: Channel × Campaign Goal
Channel Goal Campaigns Conv. Rate Avg. ROI Eng. Score
Facebook Product Launch 18775 8.0% 4.00 5.5
Facebook Brand Awareness 18836 8.0% 3.99 5.5
Facebook Increase Sales 18663 8.0% 3.98 5.5
Facebook Market Expansion 18890 8.0% 3.98 5.5
Instagram Product Launch 18874 7.9% 4.02 5.5
Instagram Brand Awareness 18774 8.0% 4.01 5.5
Instagram Increase Sales 18690 7.9% 4.01 5.5
Instagram Market Expansion 18763 8.0% 4.00 5.5
Pinterest Brand Awareness 18918 8.0% 0.72 1.0
Pinterest Increase Sales 18732 8.0% 0.72 1.0
Pinterest Product Launch 18814 8.0% 0.72 1.0
Pinterest Market Expansion 18618 8.0% 0.71 1.0
Twitter Product Launch 18567 8.0% 4.04 5.5
Twitter Increase Sales 18878 8.0% 4.01 5.5
Twitter Brand Awareness 18720 8.0% 3.99 5.5
Twitter Market Expansion 18488 8.1% 3.97 5.5

10 Key Conclusions

Channel with highest average ROI: Instagram
Most profitable customer segment: Technology
Campaign goal with highest conversion: Market Expansion

Key takeaways for your presentation:

  1. All four channels show very similar ROI distributions, suggesting that content strategy and audience targeting matter more than channel selection alone.
  2. Engagement score has no strong linear correlation with ROI, indicating that surface-level interaction does not guarantee financial return.
  3. Product Launch and Increase Sales campaigns consistently achieve higher conversion rates, as the goal aligns directly with the expected user action.
  4. Geographic variation is moderate, but Los Angeles and New York show the highest campaign volume and the most stable performance.
  5. Conversion rate varies primarily by target audience rather than language — challenging common assumptions about localization strategy.

Analysis automatically generated in R · 2026-04-19