# Calculate key insights
total_companies <- nrow(df)
total_categories <- n_distinct(df$Category_Layer)
total_subcategories <- n_distinct(df$Subcategory)
top_3_layers <- df %>%
count(Category_Layer, sort = TRUE) %>%
head(3)
top_3_subcats <- df %>%
count(Subcategory, sort = TRUE) %>%
head(3)
most_diverse <- df %>%
group_by(Category_Layer) %>%
summarise(subcats = n_distinct(Subcategory)) %>%
arrange(desc(subcats)) %>%
slice(1)
cat('
<div style="background-color: #f8f9fa; padding: 30px; border-radius: 15px; margin: 20px 0; border: 1px solid #dee2e6;">
<h2 style="color: #2c3e50; text-align: center;">📈 Executive Insights</h2>
<h3 style="color: #495057;">Market Overview</h3>
<ul style="font-size: 16px; color: #333333;">
<li>The AI/ML map contains <strong>', total_companies, ' companies</strong> across <strong>', total_categories, ' major category layers</strong></li>
<li>These companies span <strong>', total_subcategories, ' unique subcategories</strong>, indicating high market specialization</li>
</ul>
<h3 style="color: #495057;">Top Performing Layers</h3>
<ol style="font-size: 16px; color: #333333;">
<li><strong>', top_3_layers$Category_Layer[1], '</strong>: ', top_3_layers$n[1], ' companies</li>
<li><strong>', top_3_layers$Category_Layer[2], '</strong>: ', top_3_layers$n[2], ' companies</li>
<li><strong>', top_3_layers$Category_Layer[3], '</strong>: ', top_3_layers$n[3], ' companies</li>
</ol>
<h3 style="color: #495057;">Most Competitive Subcategories</h3>
<ol style="font-size: 16px; color: #333333;">
<li><strong>', top_3_subcats$Subcategory[1], '</strong>: ', top_3_subcats$n[1], ' companies</li>
<li><strong>', top_3_subcats$Subcategory[2], '</strong>: ', top_3_subcats$n[2], ' companies</li>
<li><strong>', top_3_subcats$Subcategory[3], '</strong>: ', top_3_subcats$n[3], ' companies</li>
</ol>
<h3 style="color: #495057;">Key Observation</h3>
<p style="font-size: 16px; color: #333333;">
The <strong>', most_diverse$Category_Layer, '</strong> layer shows the highest diversity with
<strong>', most_diverse$subcats, ' distinct subcategories</strong>, indicating a mature and
well-segmented market.
</p>
</div>
')
The Infrastructure (Integrate & Operate) layer shows the highest diversity with 9 distinct subcategories, indicating a mature and well-segmented market.
top_layer <- df %>% count(Category_Layer, sort = TRUE) %>% slice(1) %>% pull(Category_Layer)
top_layer_pct <- (df %>% count(Category_Layer, sort = TRUE) %>% slice(1) %>% pull(n) / total_companies * 100) %>% round(1)
cat('<div style="background-color: #f8f9fa; padding: 20px; border-radius: 10px; margin: 20px 0; border: 1px solid #dee2e6;">
<h2 style="color: #2c3e50;">📊 KPI Summary</h2>
<ul style="font-size: 16px; color: #333333;">
<li><strong>Total Companies:</strong> ', total_companies, '</li>
<li><strong>Category Layers:</strong> ', total_categories, '</li>
<li><strong>Subcategories:</strong> ', total_subcategories, '</li>
<li><strong>Top Layer:</strong> ', top_layer, '</li>
<li><strong>Market Concentration:</strong> Top layer accounts for ', top_layer_pct, '% of companies</li>
</ul>
</div>')
layer_counts <- df %>%
count(Category_Layer, sort = TRUE)
ggplot(layer_counts, aes(x = reorder(Category_Layer, n), y = n, fill = Category_Layer)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = n), hjust = -0.2, size = 4) +
coord_flip() +
scale_fill_viridis_d(option = "plasma") +
labs(
title = "Distribution of Companies by Category Layer",
subtitle = "Total company count across major AI/ML categories",
x = NULL,
y = "Number of Companies"
) +
theme(
plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray60"),
panel.grid.major.y = element_blank()
)
subcat_counts <- df %>%
count(Subcategory, sort = TRUE) %>%
top_n(20, n)
ggplot(subcat_counts, aes(x = reorder(Subcategory, n), y = n)) +
geom_segment(aes(xend = Subcategory, y = 0, yend = n), color = "#3498db", size = 1.5) +
geom_point(size = 5, color = "#e74c3c") +
geom_text(aes(label = n), hjust = -0.5, size = 3.5) +
coord_flip() +
labs(
title = "Top 20 Subcategories by Company Count",
subtitle = "Lollipop chart showing market concentration",
x = NULL,
y = "Number of Companies"
) +
theme(
plot.title = element_text(size = 18, face = "bold"),
panel.grid.major.y = element_blank()
)
treemap_data <- df %>%
count(Category_Layer, Subcategory) %>%
arrange(desc(n))
# Create rainbow palette with enough colors
n_colors <- nrow(treemap_data)
rainbow_colors <- colorRampPalette(c("#FF0000", "#FF7F00", "#FFFF00", "#00FF00",
"#0000FF", "#4B0082", "#9400D3"))(n_colors)
treemap(treemap_data,
index = c("Category_Layer", "Subcategory"),
vSize = "n",
type = "index",
palette = rainbow_colors,
title = "AI/ML Market Map - Hierarchical View",
fontsize.labels = c(14, 10),
fontcolor.labels = c("white", "black"),
bg.labels = c("transparent"),
align.labels = list(c("left", "top"), c("center", "center")),
border.col = c("white", "gray80"),
border.lwds = c(3, 1))
subcat_summary <- df %>%
count(Subcategory, sort = TRUE) %>%
mutate(
cumulative = cumsum(n),
cumulative_pct = cumulative / sum(n) * 100,
rank = row_number()
)
ggplot(subcat_summary %>% top_n(30, n),
aes(x = reorder(Subcategory, n), y = n, fill = cumulative_pct)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "#3498db", high = "#e74c3c", name = "Cumulative %") +
labs(
title = "Pareto Analysis of AI/ML Subcategories",
subtitle = "Top 30 subcategories by company count",
x = NULL,
y = "Number of Companies"
) +
theme(
plot.title = element_text(size = 18, face = "bold")
)
layer_subcat <- df %>%
count(Category_Layer, Subcategory) %>%
group_by(Category_Layer) %>%
mutate(pct = n / sum(n) * 100) %>%
arrange(Category_Layer, desc(n)) %>%
group_by(Category_Layer) %>%
mutate(rank = row_number()) %>%
filter(rank <= 5) # Top 5 per layer
ggplot(layer_subcat, aes(x = reorder(Category_Layer, n), y = n, fill = Subcategory)) +
geom_col(position = "stack") +
coord_flip() +
scale_fill_brewer(palette = "Set3") +
labs(
title = "Top Subcategories within Each Category Layer",
subtitle = "Showing top 5 subcategories per layer",
x = NULL,
y = "Number of Companies",
fill = "Subcategory"
) +
theme(
plot.title = element_text(size = 18, face = "bold"),
legend.position = "bottom",
legend.text = element_text(size = 8)
) +
guides(fill = guide_legend(nrow = 3))
diversity_data <- df %>%
group_by(Category_Layer) %>%
summarise(
total_companies = n(),
unique_subcats = n_distinct(Subcategory),
diversity_index = unique_subcats / total_companies
) %>%
arrange(desc(diversity_index))
ggplot(diversity_data, aes(x = reorder(Category_Layer, diversity_index),
y = diversity_index, fill = Category_Layer)) +
geom_col(show.legend = FALSE) +
coord_flip() +
scale_fill_manual(values = rainbow(nrow(diversity_data))) +
labs(
title = "Subcategory Diversity by Layer",
subtitle = "Higher values indicate more diverse subcategory distribution",
x = NULL,
y = "Diversity Index (Unique Subcategories / Total Companies)"
) +
theme(plot.title = element_text(size = 18, face = "bold"))
layer_metrics <- df %>%
group_by(Category_Layer) %>%
summarise(
Companies = n(),
Subcategories = n_distinct(Subcategory),
Avg_Companies_per_Subcat = round(n() / n_distinct(Subcategory), 1)
) %>%
arrange(desc(Companies))
datatable(layer_metrics,
options = list(pageLength = 10, dom = 't'),
rownames = FALSE,
caption = "Category Layer Metrics Comparison") %>%
formatStyle(columns = 1:4, fontSize = '14px')
infra_data <- df %>%
filter(str_detect(Category_Layer, "Infrastructure")) %>%
count(Subcategory, sort = TRUE)
if(nrow(infra_data) > 0) {
ggplot(infra_data, aes(x = reorder(Subcategory, n), y = n, fill = Subcategory)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = n), hjust = -0.2, size = 4) +
coord_flip() +
scale_fill_manual(values = rainbow(nrow(infra_data))) +
labs(
title = "Infrastructure Layer Subcategories",
subtitle = "Company distribution within infrastructure",
x = NULL,
y = "Number of Companies"
) +
theme(
plot.title = element_text(size = 18, face = "bold"),
panel.grid.major.y = element_blank()
)
}
mlai_data <- df %>%
filter(str_detect(Category_Layer, "ML/AI")) %>%
count(Category_Layer, Subcategory, sort = TRUE)
if(nrow(mlai_data) > 0) {
top_mlai <- mlai_data %>% top_n(20, n)
ggplot(top_mlai,
aes(x = reorder(Subcategory, n), y = n, fill = Subcategory)) +
geom_col(show.legend = FALSE) +
coord_flip() +
scale_fill_manual(values = rainbow(nrow(top_mlai))) +
labs(
title = "Top ML/AI Subcategories",
subtitle = "Focus on Build, Validate & Secure, Operate & Supply layers",
x = NULL,
y = "Number of Companies"
) +
theme(
plot.title = element_text(size = 18, face = "bold"),
legend.position = "bottom"
)
}
apps_data <- df %>%
filter(str_detect(Category_Layer, "Applications")) %>%
count(Subcategory, sort = TRUE)
if(nrow(apps_data) > 0) {
ggplot(apps_data, aes(x = reorder(Subcategory, n), y = n, fill = Subcategory)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = n), hjust = -0.2, size = 3.5) +
coord_flip() +
scale_fill_manual(values = rainbow(nrow(apps_data))) +
labs(
title = "Applications Layer Subcategories",
subtitle = "Horizontal applications distribution",
x = NULL,
y = "Number of Companies"
) +
theme(
plot.title = element_text(size = 18, face = "bold"),
panel.grid.major.y = element_blank()
)
}
layer_counts <- df %>%
count(Category_Layer, sort = TRUE)
layer_plot <- layer_counts %>%
plot_ly(
x = ~reorder(Category_Layer, n),
y = ~n,
type = "bar",
marker = list(
color = rainbow(nrow(layer_counts))
),
text = ~paste("Companies:", n),
hoverinfo = "text"
) %>%
layout(
title = "Interactive Category Layer Distribution",
xaxis = list(title = ""),
yaxis = list(title = "Number of Companies"),
showlegend = FALSE
)
layer_plot
Report generated by Victor Shamanovsky
2025 AI/ML Market Map Analysis
© 2025 All rights reserved.