1 EXECUTIVE SUMMARY

# Calculate key insights
total_companies <- nrow(df)
total_categories <- n_distinct(df$Category_Layer)
total_subcategories <- n_distinct(df$Subcategory)

top_3_layers <- df %>% 
  count(Category_Layer, sort = TRUE) %>% 
  head(3)

top_3_subcats <- df %>% 
  count(Subcategory, sort = TRUE) %>% 
  head(3)

most_diverse <- df %>%
  group_by(Category_Layer) %>%
  summarise(subcats = n_distinct(Subcategory)) %>%
  arrange(desc(subcats)) %>%
  slice(1)

cat('
<div style="background-color: #f8f9fa; padding: 30px; border-radius: 15px; margin: 20px 0; border: 1px solid #dee2e6;">
  <h2 style="color: #2c3e50; text-align: center;">📈 Executive Insights</h2>
  
  <h3 style="color: #495057;">Market Overview</h3>
  <ul style="font-size: 16px; color: #333333;">
    <li>The AI/ML map contains <strong>', total_companies, ' companies</strong> across <strong>', total_categories, ' major category layers</strong></li>
    <li>These companies span <strong>', total_subcategories, ' unique subcategories</strong>, indicating high market specialization</li>
  </ul>
  
  <h3 style="color: #495057;">Top Performing Layers</h3>
  <ol style="font-size: 16px; color: #333333;">
    <li><strong>', top_3_layers$Category_Layer[1], '</strong>: ', top_3_layers$n[1], ' companies</li>
    <li><strong>', top_3_layers$Category_Layer[2], '</strong>: ', top_3_layers$n[2], ' companies</li>
    <li><strong>', top_3_layers$Category_Layer[3], '</strong>: ', top_3_layers$n[3], ' companies</li>
  </ol>
  
  <h3 style="color: #495057;">Most Competitive Subcategories</h3>
  <ol style="font-size: 16px; color: #333333;">
    <li><strong>', top_3_subcats$Subcategory[1], '</strong>: ', top_3_subcats$n[1], ' companies</li>
    <li><strong>', top_3_subcats$Subcategory[2], '</strong>: ', top_3_subcats$n[2], ' companies</li>
    <li><strong>', top_3_subcats$Subcategory[3], '</strong>: ', top_3_subcats$n[3], ' companies</li>
  </ol>
  
  <h3 style="color: #495057;">Key Observation</h3>
  <p style="font-size: 16px; color: #333333;">
    The <strong>', most_diverse$Category_Layer, '</strong> layer shows the highest diversity with 
    <strong>', most_diverse$subcats, ' distinct subcategories</strong>, indicating a mature and 
    well-segmented market.
  </p>
</div>
')

📈 Executive Insights

Market Overview

The AI/ML map contains 1156 companies across 9 major category layers
These companies span 36 unique subcategories, indicating high market specialization

Top Performing Layers

Applications (Horizontal) : 416 companies
Infrastructure (Integrate & Operate) : 168 companies
Data & AI Advisory : 129 companies

Most Competitive Subcategories

Generative Media : 319 companies
Speech & Voice Intelligence : 83 companies
Machine Learning Operations (MLOps) : 77 companies

Key Observation

The Infrastructure (Integrate & Operate) layer shows the highest diversity with 9 distinct subcategories, indicating a mature and well-segmented market.

2 KPI SUMMARY

top_layer <- df %>% count(Category_Layer, sort = TRUE) %>% slice(1) %>% pull(Category_Layer)
top_layer_pct <- (df %>% count(Category_Layer, sort = TRUE) %>% slice(1) %>% pull(n) / total_companies * 100) %>% round(1)

cat('<div style="background-color: #f8f9fa; padding: 20px; border-radius: 10px; margin: 20px 0; border: 1px solid #dee2e6;">
  <h2 style="color: #2c3e50;">📊 KPI Summary</h2>
  <ul style="font-size: 16px; color: #333333;">
    <li><strong>Total Companies:</strong> ', total_companies, '</li>
    <li><strong>Category Layers:</strong> ', total_categories, '</li>
    <li><strong>Subcategories:</strong> ', total_subcategories, '</li>
    <li><strong>Top Layer:</strong> ', top_layer, '</li>
    <li><strong>Market Concentration:</strong> Top layer accounts for ', top_layer_pct, '% of companies</li>
  </ul>
</div>')

📊 KPI Summary

Total Companies: 1156
Category Layers: 9
Subcategories: 36
Top Layer: Applications (Horizontal)
Market Concentration: Top layer accounts for 36 % of companies

3 AI MAP DATA VISUALIZATIONS

3.1 Category Layer Distribution

layer_counts <- df %>%
  count(Category_Layer, sort = TRUE)

ggplot(layer_counts, aes(x = reorder(Category_Layer, n), y = n, fill = Category_Layer)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = n), hjust = -0.2, size = 4) +
  coord_flip() +
  scale_fill_viridis_d(option = "plasma") +
  labs(
    title = "Distribution of Companies by Category Layer",
    subtitle = "Total company count across major AI/ML categories",
    x = NULL,
    y = "Number of Companies"
  ) +
  theme(
    plot.title = element_text(size = 18, face = "bold"),
    plot.subtitle = element_text(size = 12, color = "gray60"),
    panel.grid.major.y = element_blank()
  )

3.2 Top 20 Subcategories

subcat_counts <- df %>%
  count(Subcategory, sort = TRUE) %>%
  top_n(20, n)

ggplot(subcat_counts, aes(x = reorder(Subcategory, n), y = n)) +
  geom_segment(aes(xend = Subcategory, y = 0, yend = n), color = "#3498db", size = 1.5) +
  geom_point(size = 5, color = "#e74c3c") +
  geom_text(aes(label = n), hjust = -0.5, size = 3.5) +
  coord_flip() +
  labs(
    title = "Top 20 Subcategories by Company Count",
    subtitle = "Lollipop chart showing market concentration",
    x = NULL,
    y = "Number of Companies"
  ) +
  theme(
    plot.title = element_text(size = 18, face = "bold"),
    panel.grid.major.y = element_blank()
  )

3.3 Treemap of Categories

treemap_data <- df %>%
  count(Category_Layer, Subcategory) %>%
  arrange(desc(n))

# Create rainbow palette with enough colors
n_colors <- nrow(treemap_data)
rainbow_colors <- colorRampPalette(c("#FF0000", "#FF7F00", "#FFFF00", "#00FF00", 
                                      "#0000FF", "#4B0082", "#9400D3"))(n_colors)

treemap(treemap_data,
        index = c("Category_Layer", "Subcategory"),
        vSize = "n",
        type = "index",
        palette = rainbow_colors,
        title = "AI/ML Market Map - Hierarchical View",
        fontsize.labels = c(14, 10),
        fontcolor.labels = c("white", "black"),
        bg.labels = c("transparent"),
        align.labels = list(c("left", "top"), c("center", "center")),
        border.col = c("white", "gray80"),
        border.lwds = c(3, 1))

4 CATEGORY-LEVEL INSIGHTS

4.1 Market Concentration Analysis

subcat_summary <- df %>%
  count(Subcategory, sort = TRUE) %>%
  mutate(
    cumulative = cumsum(n),
    cumulative_pct = cumulative / sum(n) * 100,
    rank = row_number()
  )

ggplot(subcat_summary %>% top_n(30, n), 
       aes(x = reorder(Subcategory, n), y = n, fill = cumulative_pct)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "#3498db", high = "#e74c3c", name = "Cumulative %") +
  labs(
    title = "Pareto Analysis of AI/ML Subcategories",
    subtitle = "Top 30 subcategories by company count",
    x = NULL,
    y = "Number of Companies"
  ) +
  theme(
    plot.title = element_text(size = 18, face = "bold")
  )

4.2 Subcategory Distribution by Layer

layer_subcat <- df %>%
  count(Category_Layer, Subcategory) %>%
  group_by(Category_Layer) %>%
  mutate(pct = n / sum(n) * 100) %>%
  arrange(Category_Layer, desc(n)) %>%
  group_by(Category_Layer) %>%
  mutate(rank = row_number()) %>%
  filter(rank <= 5)  # Top 5 per layer

ggplot(layer_subcat, aes(x = reorder(Category_Layer, n), y = n, fill = Subcategory)) +
  geom_col(position = "stack") +
  coord_flip() +
  scale_fill_brewer(palette = "Set3") +
  labs(
    title = "Top Subcategories within Each Category Layer",
    subtitle = "Showing top 5 subcategories per layer",
    x = NULL,
    y = "Number of Companies",
    fill = "Subcategory"
  ) +
  theme(
    plot.title = element_text(size = 18, face = "bold"),
    legend.position = "bottom",
    legend.text = element_text(size = 8)
  ) +
  guides(fill = guide_legend(nrow = 3))

5 COMPARATIVE ANALYSIS

5.1 Diversity Index by Layer

diversity_data <- df %>%
  group_by(Category_Layer) %>%
  summarise(
    total_companies = n(),
    unique_subcats = n_distinct(Subcategory),
    diversity_index = unique_subcats / total_companies
  ) %>%
  arrange(desc(diversity_index))

ggplot(diversity_data, aes(x = reorder(Category_Layer, diversity_index), 
                           y = diversity_index, fill = Category_Layer)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  scale_fill_manual(values = rainbow(nrow(diversity_data))) +
  labs(
    title = "Subcategory Diversity by Layer",
    subtitle = "Higher values indicate more diverse subcategory distribution",
    x = NULL,
    y = "Diversity Index (Unique Subcategories / Total Companies)"
  ) +
  theme(plot.title = element_text(size = 18, face = "bold"))

5.2 Layer Comparison Matrix

layer_metrics <- df %>%
  group_by(Category_Layer) %>%
  summarise(
    Companies = n(),
    Subcategories = n_distinct(Subcategory),
    Avg_Companies_per_Subcat = round(n() / n_distinct(Subcategory), 1)
  ) %>%
  arrange(desc(Companies))

datatable(layer_metrics, 
          options = list(pageLength = 10, dom = 't'),
          rownames = FALSE,
          caption = "Category Layer Metrics Comparison") %>%
  formatStyle(columns = 1:4, fontSize = '14px')

6 DETAILED BREAKDOWNS

6.1 Infrastructure Layer Deep Dive

infra_data <- df %>%
  filter(str_detect(Category_Layer, "Infrastructure")) %>%
  count(Subcategory, sort = TRUE)

if(nrow(infra_data) > 0) {
  ggplot(infra_data, aes(x = reorder(Subcategory, n), y = n, fill = Subcategory)) +
    geom_col(show.legend = FALSE) +
    geom_text(aes(label = n), hjust = -0.2, size = 4) +
    coord_flip() +
    scale_fill_manual(values = rainbow(nrow(infra_data))) +
    labs(
      title = "Infrastructure Layer Subcategories",
      subtitle = "Company distribution within infrastructure",
      x = NULL,
      y = "Number of Companies"
    ) +
    theme(
      plot.title = element_text(size = 18, face = "bold"),
      panel.grid.major.y = element_blank()
    )
}

6.2 ML/AI Build & Validate

mlai_data <- df %>%
  filter(str_detect(Category_Layer, "ML/AI")) %>%
  count(Category_Layer, Subcategory, sort = TRUE)

if(nrow(mlai_data) > 0) {
  top_mlai <- mlai_data %>% top_n(20, n)
  ggplot(top_mlai, 
         aes(x = reorder(Subcategory, n), y = n, fill = Subcategory)) +
    geom_col(show.legend = FALSE) +
    coord_flip() +
    scale_fill_manual(values = rainbow(nrow(top_mlai))) +
    labs(
      title = "Top ML/AI Subcategories",
      subtitle = "Focus on Build, Validate & Secure, Operate & Supply layers",
      x = NULL,
      y = "Number of Companies"
    ) +
    theme(
      plot.title = element_text(size = 18, face = "bold"),
      legend.position = "bottom"
    )
}

6.3 Applications Layer

apps_data <- df %>%
  filter(str_detect(Category_Layer, "Applications")) %>%
  count(Subcategory, sort = TRUE)

if(nrow(apps_data) > 0) {
  ggplot(apps_data, aes(x = reorder(Subcategory, n), y = n, fill = Subcategory)) +
    geom_col(show.legend = FALSE) +
    geom_text(aes(label = n), hjust = -0.2, size = 3.5) +
    coord_flip() +
    scale_fill_manual(values = rainbow(nrow(apps_data))) +
    labs(
      title = "Applications Layer Subcategories",
      subtitle = "Horizontal applications distribution",
      x = NULL,
      y = "Number of Companies"
    ) +
    theme(
      plot.title = element_text(size = 18, face = "bold"),
      panel.grid.major.y = element_blank()
    )
}

7 INTERACTIVE VISUALIZATIONS

7.1 Interactive Layer Explorer

layer_counts <- df %>%
  count(Category_Layer, sort = TRUE)

layer_plot <- layer_counts %>%
  plot_ly(
    x = ~reorder(Category_Layer, n),
    y = ~n,
    type = "bar",
    marker = list(
      color = rainbow(nrow(layer_counts))
    ),
    text = ~paste("Companies:", n),
    hoverinfo = "text"
  ) %>%
  layout(
    title = "Interactive Category Layer Distribution",
    xaxis = list(title = ""),
    yaxis = list(title = "Number of Companies"),
    showlegend = FALSE
  )

layer_plot

Report generated by Victor Shamanovsky

2025 AI/ML Market Map Analysis

2025 AI MAP DATA

EXPLORATORY ANALYSIS OF AI/ML COMPANIES, CATEGORIES, AND MARKET MAP

Victor Shamanovsky

Sunday, November 02, 2025 at 09:32 PM EST