# Initialize dataset with comprehensive metrics
brand_data <- tibble::tibble(
Brand = c("L'Oreal", "Procter & Gamble", "Gillette", "Natura", "Nivea",
"Maybelline", "Benefit", "Dove", "M.A.C", "Guerlain"),
Instagram_Followers = c(3630100, 160770, 1625800, 3390290, 2291300,
3539000, 996500, 4337100, 321720, 614600),
Instagram_Hashtag_Count = c(14900000, 250000, 420000, 5400000, 2400000,
12900000, 11800000, 48900000, 24800000, 1900000),
TikTok_Followers = c(2170000, 10000, 370000, 310000, 1620000,
8950000, 14200000, 2400000, 2050000, 1600000),
TikTok_Likes = c(905400, 35100, 60600, 1700000, 1500000,
613800, 1300000, 229300, 933700, 335700),
Latest_Annual_Revenue = c(44.74, 82.006, 82, 5.36, 8.47,
2.6, 1.5, 8.438, 29.4, 30),
Age = c(115, 187, 123, 55, 113, 109, 48, 67, 40, 196),
Engagement_Instagram = c(90, 87, 85, 79, 79, 79, 78, 78, 77, 76)
)
# Add heritage classification first
brand_data <- brand_data %>%
mutate(
Heritage_Status = ifelse(Age >= 120, "Heritage (120+ years)", "Modern (<120 years)")
)
# Feature engineering: Create derived metrics
brand_data <- brand_data %>%
mutate(
# Social media aggregations
Total_Social_Media = Instagram_Followers + TikTok_Followers,
Total_Engagement_Signals = Instagram_Hashtag_Count + TikTok_Likes,
# Financial metrics
Revenue_Billion = Latest_Annual_Revenue,
Revenue_Per_Follower = (Revenue_Billion * 1e9) / Total_Social_Media,
# Age classifications
Age_Category = case_when(
Age >= 120 ~ "Heritage (120+ years)",
Age >= 100 ~ "Established (100-119 years)",
Age >= 50 ~ "Mature (50-99 years)",
TRUE ~ "Emerging (<50 years)"
),
Age_Category = factor(Age_Category, levels = c(
"Heritage (120+ years)", "Established (100-119 years)",
"Mature (50-99 years)", "Emerging (<50 years)"
)),
# Engagement ratios
Instagram_Engagement_Rate = (Instagram_Hashtag_Count / Instagram_Followers) * 100,
TikTok_Engagement_Rate = (TikTok_Likes / TikTok_Followers) * 100,
Combined_Engagement_Score = (Instagram_Engagement_Rate + TikTok_Engagement_Rate) / 2,
# Platform dominance
Instagram_Dominance = Instagram_Followers / Total_Social_Media * 100,
TikTok_Dominance = TikTok_Followers / Total_Social_Media * 100,
Primary_Platform = ifelse(Instagram_Dominance > 50, "Instagram", "TikTok"),
# Performance quadrants
Social_Media_Quartile = ntile(Total_Social_Media, 4),
Revenue_Quartile = ntile(Revenue_Billion, 4),
Performance_Segment = case_when(
Social_Media_Quartile >= 3 & Revenue_Quartile >= 3 ~ "Market Leaders",
Social_Media_Quartile >= 3 & Revenue_Quartile < 3 ~ "Social Dominant",
Social_Media_Quartile < 3 & Revenue_Quartile >= 3 ~ "Revenue Dominant",
TRUE ~ "Emerging Players"
)
)
# Display interactive sortable dataset
brand_data %>%
select(Brand, Instagram_Followers, Instagram_Hashtag_Count, TikTok_Followers,
TikTok_Likes, Latest_Annual_Revenue, Age, Heritage_Status) %>%
datatable(
colnames = c("Brand", "Instagram Followers", "Instagram Hashtags",
"TikTok Followers", "TikTok Likes", "Revenue ($B)",
"Age (Years)", "Brand Category"),
options = list(
pageLength = 10,
dom = 't',
ordering = TRUE,
columnDefs = list(
list(className = 'dt-center', targets = 1:7)
)
),
rownames = FALSE
) %>%
formatStyle(
'Heritage_Status',
backgroundColor = styleEqual(
c('Heritage (120+ years)', 'Modern (<120 years)'),
c('#E74C3C', '#3498DB')
),
color = 'white',
fontWeight = 'bold'
) %>%
formatCurrency('Latest_Annual_Revenue', currency = "$", digits = 2, before = TRUE, mark = ",") %>%
formatRound(c('Instagram_Followers', 'Instagram_Hashtag_Count', 'TikTok_Followers', 'TikTok_Likes'), digits = 0, mark = ",")
# Social Media Footprint Plot
p1 <- brand_data %>%
arrange(desc(Total_Social_Media)) %>%
ggplot(aes(x = reorder(Brand, Total_Social_Media), y = Total_Social_Media/1e6)) +
geom_bar(stat = 'identity', aes(fill = Heritage_Status), width = 0.7) +
geom_text(
aes(label = sprintf("%.1fM", Total_Social_Media/1e6)),
hjust = -0.1,
size = 3,
fontface = "bold"
) +
coord_flip() +
scale_fill_manual(
values = c("Heritage (120+ years)" = "#E74C3C", "Modern (<120 years)" = "#3498DB"),
name = "Brand Category"
) +
labs(
title = "Social Media Presence",
subtitle = "Combined Instagram + TikTok (Millions)",
x = NULL,
y = "Total Followers (Millions)"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
theme(legend.position = "none")
# Revenue Distribution Plot
p2 <- brand_data %>%
arrange(desc(Revenue_Billion)) %>%
ggplot(aes(x = reorder(Brand, Revenue_Billion), y = Revenue_Billion)) +
geom_bar(stat = 'identity', aes(fill = Heritage_Status), width = 0.7) +
geom_text(
aes(label = sprintf("$%.1fB", Revenue_Billion)),
hjust = -0.1,
size = 3,
fontface = "bold"
) +
coord_flip() +
scale_fill_manual(
values = c("Heritage (120+ years)" = "#E74C3C", "Modern (<120 years)" = "#3498DB"),
name = "Brand Category"
) +
labs(
title = "Annual Revenue",
subtitle = "Latest Fiscal Year (USD Billions)",
x = NULL,
y = "Revenue ($ Billions)"
) +
scale_y_continuous(
expand = expansion(mult = c(0, 0.15)),
labels = dollar_format(prefix = "$", suffix = "B")
) +
theme(legend.position = "bottom")
# Combine plots
p1 + p2 + plot_layout(ncol = 2, guides = "collect") & theme(legend.position = "bottom")
# Social Media vs Revenue
p1 <- ggplot(brand_data, aes(x = Total_Social_Media/1e6, y = Revenue_Billion)) +
geom_smooth(method = "lm", se = TRUE, color = "steelblue", alpha = 0.2, linetype = "dashed") +
geom_point(aes(color = Heritage_Status), size = 6, alpha = 0.7) +
geom_label_repel(aes(label = Brand), size = 2.5, box.padding = 0.35, segment.color = "gray50") +
scale_color_manual(
values = c("Heritage (120+ years)" = "#E74C3C", "Modern (<120 years)" = "#3498DB"),
name = "Brand Category"
) +
labs(
title = "Social Media Followers vs. Revenue",
subtitle = sprintf("Pearson r = %.3f (Weak Correlation)", cor(brand_data$Total_Social_Media, brand_data$Revenue_Billion)),
x = "Total Followers (Millions)",
y = "Revenue ($ Billions)"
) +
scale_x_continuous(labels = comma) +
scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "B")) +
theme(legend.position = "none")
# Brand Age vs Revenue
p2 <- ggplot(brand_data, aes(x = Age, y = Revenue_Billion)) +
geom_smooth(method = "loess", se = TRUE, color = "darkblue", alpha = 0.2) +
geom_point(aes(color = Heritage_Status), size = 6, alpha = 0.7) +
geom_label_repel(aes(label = Brand), size = 2.5, box.padding = 0.35) +
scale_color_manual(
values = c("Heritage (120+ years)" = "#E74C3C", "Modern (<120 years)" = "#3498DB"),
name = "Brand Category"
) +
labs(
title = "Brand Age vs. Revenue",
subtitle = "Heritage Brands Demonstrate Revenue Advantage",
x = "Brand Age (Years)",
y = "Revenue ($ Billions)"
) +
scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "B")) +
theme(legend.position = "bottom")
# Combine plots
p1 + p2 + plot_layout(ncol = 2, guides = "collect") & theme(legend.position = "bottom")
heritage_stats <- brand_data %>%
mutate(Heritage_Brand = ifelse(Age >= 120, "Heritage (120+ years)", "Modern (<120 years)")) %>%
group_by(Heritage_Brand) %>%
summarise(
Brands = n(),
Avg_Revenue = mean(Revenue_Billion),
Avg_Social_Media = mean(Total_Social_Media)
)
heritage_stats %>%
kable(
format = "html",
col.names = c("Brand Category", "Number of Brands", "Average Revenue ($B)", "Average Followers"),
digits = c(0, 0, 2, 0),
format.args = list(big.mark = ",")
) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| Brand Category | Number of Brands | Average Revenue ($B) | Average Followers |
|---|---|---|---|
| Heritage (120+ years) | 3 | 64.67 | 1,460,390 |
| Modern (<120 years) | 7 | 14.36 | 7,172,287 |
Heritage brands generate 350% higher average revenue than modern brands
# Create merged segment variable
brand_data <- brand_data %>%
mutate(
Merged_Segment = case_when(
Performance_Segment %in% c("Market Leaders", "Revenue Dominant") ~ "Leaders/Revenue Dominant",
Performance_Segment %in% c("Emerging Players", "Social Dominant") ~ "Emerging/Social Dominant"
)
)
performance_summary <- brand_data %>%
group_by(Merged_Segment) %>%
summarise(
Brands = n(),
Avg_Revenue = mean(Revenue_Billion),
Avg_Social_Media = mean(Total_Social_Media),
Avg_Engagement = mean(Combined_Engagement_Score),
Avg_Age = mean(Age)
) %>%
arrange(desc(Avg_Revenue))
# Display summary table
performance_summary %>%
kable(
format = "html",
col.names = c("Market Segment", "Brands", "Avg. Revenue ($B)",
"Avg. Followers", "Avg. Engagement", "Avg. Age"),
digits = c(0, 0, 2, 0, 1, 0),
format.args = list(big.mark = ",")
) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(0, bold = TRUE, background = "#2c3e50", color = "white")
| Market Segment | Brands | Avg. Revenue ($B) | Avg. Followers | Avg. Engagement | Avg. Age |
|---|---|---|---|---|---|
| Leaders/Revenue Dominant | 4 | 59.69 | 2,545,318 | 166.4 | 155 |
| Emerging/Social Dominant | 6 | 9.29 | 7,400,985 | 946.7 | 72 |
# Prepare visualization data
viz_data <- performance_summary %>%
select(Merged_Segment, Avg_Revenue, Avg_Social_Media, Avg_Engagement, Avg_Age) %>%
pivot_longer(
cols = c(Avg_Social_Media, Avg_Engagement, Avg_Age),
names_to = "Metric_Type",
values_to = "Metric_Value"
) %>%
mutate(
Metric_Label = recode(
Metric_Type,
Avg_Social_Media = "Average Followers\n(Weak Predictor)",
Avg_Engagement = "Average Engagement\n(Strong Predictor)",
Avg_Age = "Average Brand Age\n(Strong Predictor)"
),
Metric_Label = factor(
Metric_Label,
levels = c(
"Average Followers\n(Weak Predictor)",
"Average Engagement\n(Strong Predictor)",
"Average Brand Age\n(Strong Predictor)"
)
)
)
# Create scatter plot
ggplot(viz_data, aes(x = Metric_Value, y = Avg_Revenue)) +
geom_smooth(method = "lm", se = FALSE, color = "gray60", linetype = "dashed", size = 0.8) +
geom_point(aes(color = Merged_Segment), size = 8, alpha = 0.85) +
geom_text(
aes(label = Merged_Segment),
size = 3,
fontface = "bold",
vjust = 2.5,
color = "gray20"
) +
facet_wrap(~ Metric_Label, scales = "free_x", nrow = 1) +
scale_color_manual(
values = c(
"Leaders/Revenue Dominant" = "#27AE60",
"Emerging/Social Dominant" = "#E74C3C"
),
name = "Performance Segment"
) +
scale_y_continuous(labels = dollar_format(prefix = "$", suffix = "B")) +
labs(
title = "Comparative Analysis: Revenue Predictors",
subtitle = "Brand age and engagement demonstrate strong positive relationships with revenue, while follower count shows weak correlation",
x = "\nMetric Value",
y = "Average Revenue ($B)\n",
caption = "Note: High-revenue brands consistently exhibit elevated brand age and engagement metrics, but not necessarily high follower counts.\nStrategic implication: Investment prioritization should favor engagement optimization and heritage brand positioning over follower acquisition."
) +
theme_minimal() +
theme(
plot.title = element_text(size = 18, face = "bold", hjust = 0.5, margin = margin(b = 8)),
plot.subtitle = element_text(size = 13, hjust = 0.5, color = "gray30", margin = margin(b = 15)),
plot.caption = element_text(size = 11, color = "gray40", hjust = 0, margin = margin(t = 15)),
axis.title = element_text(size = 13, face = "bold"),
axis.text = element_text(size = 10, face = "bold"),
legend.position = "top",
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
strip.text = element_text(size = 12, face = "bold", color = "white"),
strip.background = element_rect(fill = "gray30", color = NA),
panel.grid.major = element_line(color = "gray90"),
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = "white", color = NA),
plot.margin = margin(20, 20, 20, 20)
)
correlations <- data.frame(
Metric = c("Average Followers", "Average Engagement", "Average Brand Age"),
Correlation_with_Revenue = c(
cor(performance_summary$Avg_Social_Media, performance_summary$Avg_Revenue),
cor(performance_summary$Avg_Engagement, performance_summary$Avg_Revenue),
cor(performance_summary$Avg_Age, performance_summary$Avg_Revenue)
),
Strength = c("Weak", "Strong", "Strong")
)
correlations %>%
mutate(Correlation_with_Revenue = round(Correlation_with_Revenue, 3)) %>%
kable(
format = "html",
col.names = c("Metric", "Correlation (r)", "Strength"),
align = c("l", "c", "c")
) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(0, bold = TRUE, background = "#34495e", color = "white") %>%
row_spec(1, background = "#fff3cd") %>%
row_spec(2:3, background = "#d4edda")
| Metric | Correlation (r) | Strength |
|---|---|---|
| Average Followers | -1 | Weak |
| Average Engagement | -1 | Strong |
| Average Brand Age | 1 | Strong |
Correlation values closer to ±1 indicate stronger relationships
Report prepared by Brand Intelligence Analytics Team