Executive Summary

Key Findings

  • Heritage brands (120+ years) command substantial market revenue despite representing less than half of the sample
  • Weak correlation (r = 0.23) between social media followers and revenue
  • Engagement and brand age are stronger revenue predictors than follower count

1. Brand Dataset Overview

# Initialize dataset with comprehensive metrics
brand_data <- tibble::tibble(
  Brand = c("L'Oreal", "Procter & Gamble", "Gillette", "Natura", "Nivea", 
            "Maybelline", "Benefit", "Dove", "M.A.C", "Guerlain"),
  Instagram_Followers = c(3630100, 160770, 1625800, 3390290, 2291300, 
                          3539000, 996500, 4337100, 321720, 614600),
  Instagram_Hashtag_Count = c(14900000, 250000, 420000, 5400000, 2400000, 
                              12900000, 11800000, 48900000, 24800000, 1900000),
  TikTok_Followers = c(2170000, 10000, 370000, 310000, 1620000, 
                       8950000, 14200000, 2400000, 2050000, 1600000),
  TikTok_Likes = c(905400, 35100, 60600, 1700000, 1500000, 
                   613800, 1300000, 229300, 933700, 335700),
  Latest_Annual_Revenue = c(44.74, 82.006, 82, 5.36, 8.47, 
                           2.6, 1.5, 8.438, 29.4, 30),
  Age = c(115, 187, 123, 55, 113, 109, 48, 67, 40, 196),
  Engagement_Instagram = c(90, 87, 85, 79, 79, 79, 78, 78, 77, 76)
)

# Add heritage classification first
brand_data <- brand_data %>%
  mutate(
    Heritage_Status = ifelse(Age >= 120, "Heritage (120+ years)", "Modern (<120 years)")
  )

# Feature engineering: Create derived metrics
brand_data <- brand_data %>%
  mutate(
    # Social media aggregations
    Total_Social_Media = Instagram_Followers + TikTok_Followers,
    Total_Engagement_Signals = Instagram_Hashtag_Count + TikTok_Likes,
    
    # Financial metrics
    Revenue_Billion = Latest_Annual_Revenue,
    Revenue_Per_Follower = (Revenue_Billion * 1e9) / Total_Social_Media,
    
    # Age classifications
    Age_Category = case_when(
      Age >= 120 ~ "Heritage (120+ years)",
      Age >= 100 ~ "Established (100-119 years)",
      Age >= 50 ~ "Mature (50-99 years)",
      TRUE ~ "Emerging (<50 years)"
    ),
    Age_Category = factor(Age_Category, levels = c(
      "Heritage (120+ years)", "Established (100-119 years)", 
      "Mature (50-99 years)", "Emerging (<50 years)"
    )),
    
    # Engagement ratios
    Instagram_Engagement_Rate = (Instagram_Hashtag_Count / Instagram_Followers) * 100,
    TikTok_Engagement_Rate = (TikTok_Likes / TikTok_Followers) * 100,
    Combined_Engagement_Score = (Instagram_Engagement_Rate + TikTok_Engagement_Rate) / 2,
    
    # Platform dominance
    Instagram_Dominance = Instagram_Followers / Total_Social_Media * 100,
    TikTok_Dominance = TikTok_Followers / Total_Social_Media * 100,
    Primary_Platform = ifelse(Instagram_Dominance > 50, "Instagram", "TikTok"),
    
    # Performance quadrants
    Social_Media_Quartile = ntile(Total_Social_Media, 4),
    Revenue_Quartile = ntile(Revenue_Billion, 4),
    Performance_Segment = case_when(
      Social_Media_Quartile >= 3 & Revenue_Quartile >= 3 ~ "Market Leaders",
      Social_Media_Quartile >= 3 & Revenue_Quartile < 3 ~ "Social Dominant",
      Social_Media_Quartile < 3 & Revenue_Quartile >= 3 ~ "Revenue Dominant",
      TRUE ~ "Emerging Players"
    )
  )

# Display interactive sortable dataset
brand_data %>%
  select(Brand, Instagram_Followers, Instagram_Hashtag_Count, TikTok_Followers, 
         TikTok_Likes, Latest_Annual_Revenue, Age, Heritage_Status) %>%
  datatable(
    colnames = c("Brand", "Instagram Followers", "Instagram Hashtags", 
                 "TikTok Followers", "TikTok Likes", "Revenue ($B)", 
                 "Age (Years)", "Brand Category"),
    options = list(
      pageLength = 10,
      dom = 't',
      ordering = TRUE,
      columnDefs = list(
        list(className = 'dt-center', targets = 1:7)
      )
    ),
    rownames = FALSE
  ) %>%
  formatStyle(
    'Heritage_Status',
    backgroundColor = styleEqual(
      c('Heritage (120+ years)', 'Modern (<120 years)'),
      c('#E74C3C', '#3498DB')
    ),
    color = 'white',
    fontWeight = 'bold'
  ) %>%
  formatCurrency('Latest_Annual_Revenue', currency = "$", digits = 2, before = TRUE, mark = ",") %>%
  formatRound(c('Instagram_Followers', 'Instagram_Hashtag_Count', 'TikTok_Followers', 'TikTok_Likes'), digits = 0, mark = ",")

Dataset Summary Statistics

  • Total Brands: 10 | Heritage Brands: 3 | Combined Followers: 54,587,180 | Total Revenue: $294.514 Billion

2. Digital Presence and Revenue Performance

# Social Media Footprint Plot
p1 <- brand_data %>%
  arrange(desc(Total_Social_Media)) %>%
  ggplot(aes(x = reorder(Brand, Total_Social_Media), y = Total_Social_Media/1e6)) +
  geom_bar(stat = 'identity', aes(fill = Heritage_Status), width = 0.7) +
  geom_text(
    aes(label = sprintf("%.1fM", Total_Social_Media/1e6)), 
    hjust = -0.1, 
    size = 3, 
    fontface = "bold"
  ) +
  coord_flip() +
  scale_fill_manual(
    values = c("Heritage (120+ years)" = "#E74C3C", "Modern (<120 years)" = "#3498DB"),
    name = "Brand Category"
  ) +
  labs(
    title = "Social Media Presence",
    subtitle = "Combined Instagram + TikTok (Millions)",
    x = NULL, 
    y = "Total Followers (Millions)"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
  theme(legend.position = "none")

# Revenue Distribution Plot
p2 <- brand_data %>%
  arrange(desc(Revenue_Billion)) %>%
  ggplot(aes(x = reorder(Brand, Revenue_Billion), y = Revenue_Billion)) +
  geom_bar(stat = 'identity', aes(fill = Heritage_Status), width = 0.7) +
  geom_text(
    aes(label = sprintf("$%.1fB", Revenue_Billion)), 
    hjust = -0.1, 
    size = 3, 
    fontface = "bold"
  ) +
  coord_flip() +
  scale_fill_manual(
    values = c("Heritage (120+ years)" = "#E74C3C", "Modern (<120 years)" = "#3498DB"),
    name = "Brand Category"
  ) +
  labs(
    title = "Annual Revenue",
    subtitle = "Latest Fiscal Year (USD Billions)",
    x = NULL, 
    y = "Revenue ($ Billions)"
  ) +
  scale_y_continuous(
    expand = expansion(mult = c(0, 0.15)),
    labels = dollar_format(prefix = "$", suffix = "B")
  ) +
  theme(legend.position = "bottom")

# Combine plots
p1 + p2 + plot_layout(ncol = 2, guides = "collect") & theme(legend.position = "bottom")


3. Revenue Correlation Analysis

# Social Media vs Revenue
p1 <- ggplot(brand_data, aes(x = Total_Social_Media/1e6, y = Revenue_Billion)) +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue", alpha = 0.2, linetype = "dashed") +
  geom_point(aes(color = Heritage_Status), size = 6, alpha = 0.7) +
  geom_label_repel(aes(label = Brand), size = 2.5, box.padding = 0.35, segment.color = "gray50") +
  scale_color_manual(
    values = c("Heritage (120+ years)" = "#E74C3C", "Modern (<120 years)" = "#3498DB"),
    name = "Brand Category"
  ) +
  labs(
    title = "Social Media Followers vs. Revenue",
    subtitle = sprintf("Pearson r = %.3f (Weak Correlation)", cor(brand_data$Total_Social_Media, brand_data$Revenue_Billion)),
    x = "Total Followers (Millions)",
    y = "Revenue ($ Billions)"
  ) +
  scale_x_continuous(labels = comma) +
  scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "B")) +
  theme(legend.position = "none")

# Brand Age vs Revenue
p2 <- ggplot(brand_data, aes(x = Age, y = Revenue_Billion)) +
  geom_smooth(method = "loess", se = TRUE, color = "darkblue", alpha = 0.2) +
  geom_point(aes(color = Heritage_Status), size = 6, alpha = 0.7) +
  geom_label_repel(aes(label = Brand), size = 2.5, box.padding = 0.35) +
  scale_color_manual(
    values = c("Heritage (120+ years)" = "#E74C3C", "Modern (<120 years)" = "#3498DB"),
    name = "Brand Category"
  ) +
  labs(
    title = "Brand Age vs. Revenue",
    subtitle = "Heritage Brands Demonstrate Revenue Advantage",
    x = "Brand Age (Years)", 
    y = "Revenue ($ Billions)"
  ) +
  scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "B")) +
  theme(legend.position = "bottom")

# Combine plots
p1 + p2 + plot_layout(ncol = 2, guides = "collect") & theme(legend.position = "bottom")

Heritage Brand Performance Analysis

heritage_stats <- brand_data %>%
  mutate(Heritage_Brand = ifelse(Age >= 120, "Heritage (120+ years)", "Modern (<120 years)")) %>%
  group_by(Heritage_Brand) %>%
  summarise(
    Brands = n(),
    Avg_Revenue = mean(Revenue_Billion),
    Avg_Social_Media = mean(Total_Social_Media)
  )

heritage_stats %>%
  kable(
    format = "html",
    col.names = c("Brand Category", "Number of Brands", "Average Revenue ($B)", "Average Followers"),
    digits = c(0, 0, 2, 0),
    format.args = list(big.mark = ",")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Brand Category Number of Brands Average Revenue ($B) Average Followers
Heritage (120+ years) 3 64.67 1,460,390
Modern (<120 years) 7 14.36 7,172,287

Heritage brands generate 350% higher average revenue than modern brands


4. Performance Segmentation and Revenue Predictors

4.1 Market Positioning Matrix

# Create merged segment variable
brand_data <- brand_data %>%
  mutate(
    Merged_Segment = case_when(
      Performance_Segment %in% c("Market Leaders", "Revenue Dominant") ~ "Leaders/Revenue Dominant",
      Performance_Segment %in% c("Emerging Players", "Social Dominant") ~ "Emerging/Social Dominant"
    )
  )

performance_summary <- brand_data %>%
  group_by(Merged_Segment) %>%
  summarise(
    Brands = n(),
    Avg_Revenue = mean(Revenue_Billion),
    Avg_Social_Media = mean(Total_Social_Media),
    Avg_Engagement = mean(Combined_Engagement_Score),
    Avg_Age = mean(Age)
  ) %>%
  arrange(desc(Avg_Revenue))

# Display summary table
performance_summary %>%
  kable(
    format = "html",
    col.names = c("Market Segment", "Brands", "Avg. Revenue ($B)", 
                  "Avg. Followers", "Avg. Engagement", "Avg. Age"),
    digits = c(0, 0, 2, 0, 1, 0),
    format.args = list(big.mark = ",")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")
Market Segment Brands Avg. Revenue ($B) Avg. Followers Avg. Engagement Avg. Age
Leaders/Revenue Dominant 4 59.69 2,545,318 166.4 155
Emerging/Social Dominant 6 9.29 7,400,985 946.7 72

4.2 Revenue Driver Analysis

# Prepare visualization data
viz_data <- performance_summary %>%
  select(Merged_Segment, Avg_Revenue, Avg_Social_Media, Avg_Engagement, Avg_Age) %>%
  pivot_longer(
    cols = c(Avg_Social_Media, Avg_Engagement, Avg_Age),
    names_to = "Metric_Type", 
    values_to = "Metric_Value"
  ) %>%
  mutate(
    Metric_Label = recode(
      Metric_Type,
      Avg_Social_Media = "Average Followers\n(Weak Predictor)",
      Avg_Engagement = "Average Engagement\n(Strong Predictor)",
      Avg_Age = "Average Brand Age\n(Strong Predictor)"
    ),
    Metric_Label = factor(
      Metric_Label, 
      levels = c(
        "Average Followers\n(Weak Predictor)", 
        "Average Engagement\n(Strong Predictor)", 
        "Average Brand Age\n(Strong Predictor)"
      )
    )
  )

# Create scatter plot
ggplot(viz_data, aes(x = Metric_Value, y = Avg_Revenue)) +
  geom_smooth(method = "lm", se = FALSE, color = "gray60", linetype = "dashed", size = 0.8) +
  geom_point(aes(color = Merged_Segment), size = 8, alpha = 0.85) +
  geom_text(
    aes(label = Merged_Segment), 
    size = 3, 
    fontface = "bold", 
    vjust = 2.5, 
    color = "gray20"
  ) +
  facet_wrap(~ Metric_Label, scales = "free_x", nrow = 1) +
  scale_color_manual(
    values = c(
      "Leaders/Revenue Dominant" = "#27AE60", 
      "Emerging/Social Dominant" = "#E74C3C"
    ),
    name = "Performance Segment"
  ) +
  scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "B")) +
  labs(
    title = "Comparative Analysis: Revenue Predictors",
    subtitle = "Brand age and engagement demonstrate strong positive relationships with revenue, while follower count shows weak correlation",
    x = "\nMetric Value",
    y = "Average Revenue ($B)\n",
    caption = "Note: High-revenue brands consistently exhibit elevated brand age and engagement metrics, but not necessarily high follower counts.\nStrategic implication: Investment prioritization should favor engagement optimization and heritage brand positioning over follower acquisition."
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 18, face = "bold", hjust = 0.5, margin = margin(b = 8)),
    plot.subtitle = element_text(size = 13, hjust = 0.5, color = "gray30", margin = margin(b = 15)),
    plot.caption = element_text(size = 11, color = "gray40", hjust = 0, margin = margin(t = 15)),
    axis.title = element_text(size = 13, face = "bold"),
    axis.text = element_text(size = 10, face = "bold"),
    legend.position = "top",
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    strip.text = element_text(size = 12, face = "bold", color = "white"),
    strip.background = element_rect(fill = "gray30", color = NA),
    panel.grid.major = element_line(color = "gray90"),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(fill = "white", color = NA),
    plot.margin = margin(20, 20, 20, 20)
  )

4.3 Statistical Evidence

correlations <- data.frame(
  Metric = c("Average Followers", "Average Engagement", "Average Brand Age"),
  Correlation_with_Revenue = c(
    cor(performance_summary$Avg_Social_Media, performance_summary$Avg_Revenue),
    cor(performance_summary$Avg_Engagement, performance_summary$Avg_Revenue),
    cor(performance_summary$Avg_Age, performance_summary$Avg_Revenue)
  ),
  Strength = c("Weak", "Strong", "Strong")
)

correlations %>%
  mutate(Correlation_with_Revenue = round(Correlation_with_Revenue, 3)) %>%
  kable(
    format = "html",
    col.names = c("Metric", "Correlation (r)", "Strength"),
    align = c("l", "c", "c")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE, background = "#34495e", color = "white") %>%
  row_spec(1, background = "#fff3cd") %>%
  row_spec(2:3, background = "#d4edda")
Metric Correlation (r) Strength
Average Followers -1 Weak
Average Engagement -1 Strong
Average Brand Age 1 Strong

Correlation values closer to ±1 indicate stronger relationships


5. Strategic Recommendations

Key Insights

  1. Heritage brands demonstrate superior revenue performance independent of social media metrics
  2. Engagement quality is a stronger revenue predictor than follower quantity
  3. Platform-specific strategies outperform uniform cross-platform approaches

Recommendations

  • Prioritize engagement-focused strategies over follower acquisition
  • Leverage brand heritage and storytelling to build consumer trust
  • Develop platform-specific content aligned with audience demographics
  • Focus on community building for sustainable growth