Project Overview

This analysis examines Insurance Agency Performance across 10 branches over FY2023–FY2024, evaluating 30 agents across 4 regions using key BFSI metrics including premium growth, agent retention, policy conversion rates, and regional market share.


Data Preparation

# Create synthetic agency performance dataset
set.seed(42)

agency_data <- data.frame(
  Agent_ID = rep(paste0("AG", 1001:1030), each = 24),
  Branch = rep(c("Kolkata North", "Kolkata South", "Howrah", "Durgapur", 
                  "Siliguri", "Asansol", "Kharagpur", "Darjeeling", 
                  "Purulia", "Bankura"), each = 72),
  Region = rep(c("Metro", "Metro", "Metro", "Tier-2", 
                  "Tier-2", "Tier-2", "Tier-2", "Rural", 
                  "Rural", "Rural"), each = 72),
  Month = rep(1:12, each = 2, length.out = 720),
  Year = rep(c(2023, 2024), 360),
  Premium_Generated = round(rnorm(720, mean = 125000, sd = 45000), 0),
  Policies_Sold = round(rnorm(720, mean = 15, sd = 5), 0),
  Customer_Satisfaction = round(rnorm(720, mean = 4.2, sd = 0.6), 1),
  Retention_Rate = round(rnorm(720, mean = 0.78, sd = 0.12), 2)
)

# Ensure positive values
agency_data$Premium_Generated <- abs(agency_data$Premium_Generated)
agency_data$Policies_Sold <- pmax(agency_data$Policies_Sold, 1)
agency_data$Retention_Rate <- pmin(pmax(agency_data$Retention_Rate, 0.5), 1)

# Add derived metrics
agency_data <- agency_data %>%
  group_by(Agent_ID, Year) %>%
  mutate(
    Avg_Premium_Per_Policy = Premium_Generated / Policies_Sold,
    Cumulative_Premium = cumsum(Premium_Generated),
    Agent_Performance_Score = (Policies_Sold / mean(Policies_Sold)) * 
                              (Customer_Satisfaction / 5) * 100
  ) %>%
  ungroup()

head(agency_data, 10)
## # A tibble: 10 × 12
##    Agent_ID Branch        Region Month  Year Premium_Generated Policies_Sold
##    <chr>    <chr>         <chr>  <int> <dbl>             <dbl>         <dbl>
##  1 AG1001   Kolkata North Metro      1  2023            186693            17
##  2 AG1001   Kolkata North Metro      1  2024             99589            22
##  3 AG1001   Kolkata North Metro      2  2023            141341            13
##  4 AG1001   Kolkata North Metro      2  2024            153479            16
##  5 AG1001   Kolkata North Metro      3  2023            143192            17
##  6 AG1001   Kolkata North Metro      3  2024            120224            22
##  7 AG1001   Kolkata North Metro      4  2023            193018            18
##  8 AG1001   Kolkata North Metro      4  2024            120740            10
##  9 AG1001   Kolkata North Metro      5  2023            215829            12
## 10 AG1001   Kolkata North Metro      5  2024            122178            11
## # ℹ 5 more variables: Customer_Satisfaction <dbl>, Retention_Rate <dbl>,
## #   Avg_Premium_Per_Policy <dbl>, Cumulative_Premium <dbl>,
## #   Agent_Performance_Score <dbl>

Visualization 1: Premium Growth by Branch (Line Plot)

Purpose: Track revenue performance across branches over time.

# Aggregate by branch and month
branch_monthly <- agency_data %>%
  group_by(Branch, Month, Year) %>%
  summarise(Total_Premium = sum(Premium_Generated), .groups = 'drop') %>%
  mutate(Date = as.numeric(paste0(Year, sprintf("%02d", Month))))

# Create line plot
p1 <- ggplot(branch_monthly, aes(x = interaction(Month, Year), y = Total_Premium, 
                                 color = Branch, group = Branch)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  facet_wrap(~Year, scales = "free_x") +
  scale_color_viridis(discrete = TRUE, option = "turbo") +
  scale_y_continuous(labels = scales::comma) +
  labs(
    title = "Premium Revenue Growth by Branch (FY2023-2024)",
    x = "Month",
    y = "Total Premium (₹)",
    color = "Branch"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "bottom")

p1


Visualization 2: Policy Conversion Rate Heatmap

Purpose: Identify high-performing and underperforming agents across regions.

# Calculate conversion rates by agent and region
agent_performance <- agency_data %>%
  group_by(Agent_ID, Region) %>%
  summarise(
    Avg_Policies = mean(Policies_Sold),
    Avg_Premium = mean(Premium_Generated),
    Conversion_Rate = (mean(Policies_Sold) / 20) * 100,
    .groups = 'drop'
  )

# Create heatmap
p2 <- ggplot(agent_performance, aes(x = Region, y = Agent_ID, fill = Conversion_Rate)) +
  geom_tile(color = "white", size = 0.5) +
  scale_fill_viridis(direction = -1, name = "Conversion\nRate (%)") +
  labs(
    title = "Agent Performance Heatmap: Policy Conversion Rate by Region",
    x = "Region",
    y = "Agent ID"
  ) +
  theme(axis.text.y = element_text(size = 7), legend.position = "right")

p2


Visualization 3: Regional Market Share (Treemap)

Purpose: Show portfolio composition by region and contribution.

# Aggregate by region
regional_share <- agency_data %>%
  group_by(Region) %>%
  summarise(
    Total_Premium = sum(Premium_Generated),
    Total_Policies = sum(Policies_Sold),
    Agent_Count = n_distinct(Agent_ID),
    .groups = 'drop'
  )

# Create treemap
p3 <- ggplot(regional_share, aes(area = Total_Premium, fill = Total_Policies, 
                                  label = Region)) +
  geom_treemap() +
  geom_treemap_text(fontface = "bold", colour = "white", place = "centre", 
                    size = 14, grow = TRUE) +
  scale_fill_viridis(name = "Policies Sold") +
  labs(title = "Regional Market Share & Policy Volume") +
  theme(legend.position = "right")

p3


Visualization 4: Customer Satisfaction Distribution (Box Plot)

Purpose: Compare satisfaction scores across branches.

# Create box plot
p4 <- ggplot(agency_data, aes(x = reorder(Branch, Customer_Satisfaction, FUN = median), 
                              y = Customer_Satisfaction, fill = Region)) +
  geom_boxplot(alpha = 0.7, outlier.size = 2) +
  geom_jitter(width = 0.2, alpha = 0.3, size = 1.5) +
  scale_fill_viridis(discrete = TRUE, option = "mako") +
  scale_y_continuous(limits = c(2.5, 5)) +
  coord_flip() +
  labs(
    title = "Customer Satisfaction Distribution by Branch",
    x = "Branch",
    y = "Satisfaction Score (out of 5)",
    fill = "Region"
  ) +
  theme(legend.position = "bottom")

p4


Visualization 5: Agent Performance Ranking (Bar Chart)

Purpose: Rank top and bottom performers by composite score.

# Calculate agent performance score
top_agents <- agency_data %>%
  group_by(Agent_ID, Branch) %>%
  summarise(
    Performance_Score = mean(Agent_Performance_Score),
    Avg_Retention = mean(Retention_Rate),
    .groups = 'drop'
  ) %>%
  arrange(desc(Performance_Score)) %>%
  mutate(Rank = row_number(), Tier = case_when(
    Rank <= 5 ~ "Top 5",
    Rank <= 15 ~ "Middle Tier",
    TRUE ~ "Development"
  )) %>%
  filter(Rank <= 15)

# Create bar plot
p5 <- ggplot(top_agents, aes(x = reorder(Agent_ID, Performance_Score), 
                             y = Performance_Score, fill = Tier)) +
  geom_col(alpha = 0.85) +
  geom_text(aes(label = round(Performance_Score, 1)), hjust = -0.2, size = 3.5) +
  scale_fill_manual(values = c("Top 5" = "#2ecc71", "Middle Tier" = "#f39c12", 
                               "Development" = "#e74c3c")) +
  coord_flip() +
  labs(
    title = "Top 15 Agents by Composite Performance Score",
    x = "Agent ID",
    y = "Performance Score",
    fill = "Performance Tier"
  ) +
  theme(legend.position = "bottom")

p5


Visualization 6: Premium vs. Policies Scatter Plot

Purpose: Analyze efficiency correlation between premium volume and policy count.

# Aggregate by agent
agent_efficiency <- agency_data %>%
  group_by(Agent_ID, Region) %>%
  summarise(
    Total_Premium = sum(Premium_Generated),
    Total_Policies = sum(Policies_Sold),
    Avg_Satisfaction = mean(Customer_Satisfaction),
    .groups = 'drop'
  )

# Create scatter plot with plotly for interactivity
p6 <- plot_ly(agent_efficiency, 
              x = ~Total_Policies, 
              y = ~Total_Premium,
              color = ~Region,
              size = ~Avg_Satisfaction,
              text = ~Agent_ID,
              mode = 'markers',
              type = 'scatter',
              marker = list(opacity = 0.7, sizemode = 'diameter', sizeref = 2)) %>%
  layout(
    title = list(text = "Interactive: Premium Generated vs. Policies Sold"),
    xaxis = list(title = "Total Policies Sold"),
    yaxis = list(title = "Total Premium Generated (₹)"),
    hovermode = "closest",
    showlegend = TRUE
  )

p6

Visualization 7: Retention Rate Trend (Interactive Line Chart)

Purpose: Track retention performance trends by region with interactivity.

# Aggregate retention by region and month
retention_trend <- agency_data %>%
  group_by(Region, Month, Year) %>%
  summarise(
    Avg_Retention = mean(Retention_Rate),
    .groups = 'drop'
  ) %>%
  mutate(
    Month_Label = paste0("Month ", Month)
  )

# Create interactive line chart with Plotly
p7 <- plot_ly(data = retention_trend, 
              x = ~Month, 
              y = ~Avg_Retention,
              color = ~Region,
              type = 'scatter',
              mode = 'lines+markers',
              line = list(width = 3),
              marker = list(size = 8),
              hovertemplate = '<b>%{fullData.name}</b><br>Month: %{x}<br>Retention: %{y:.2%}<extra></extra>') %>%
  add_trace(x = ~Month, y = ~Avg_Retention) %>%
  layout(
    title = list(text = "Customer Retention Rate Trends by Region (FY2023-2024)"),
    xaxis = list(title = "Month (1-12)"),
    yaxis = list(title = "Retention Rate", tickformat = ".0%"),
    hovermode = "x unified",
    showlegend = TRUE,
    plot_bgcolor = "rgba(240,240,240,0.5)",
    paper_bgcolor = "white"
  )

p7

Visualization 8: Multi-Metric Dashboard Summary (Interactive)

Purpose: Comprehensive overview of all key metrics by branch.

# Create summary by branch
branch_summary <- agency_data %>%
  group_by(Branch, Region) %>%
  summarise(
    Total_Premium = sum(Premium_Generated),
    Policies_Sold = sum(Policies_Sold),
    Avg_Satisfaction = mean(Customer_Satisfaction),
    Avg_Retention = mean(Retention_Rate),
    Agent_Count = n_distinct(Agent_ID),
    .groups = 'drop'
  ) %>%
  mutate(Premium_Per_Agent = Total_Premium / Agent_Count)

# Prepare data for sunburst chart
sunburst_data <- agency_data %>%
  group_by(Region, Branch) %>%
  summarise(
    Premium = sum(Premium_Generated),
    .groups = 'drop'
  ) %>%
  mutate(Parent = Region, ID = Branch) %>%
  bind_rows(
    data.frame(Region = unique(agency_data$Region), Parent = "", 
               Branch = unique(agency_data$Region), Premium = NA, ID = unique(agency_data$Region))
  ) %>%
  bind_rows(
    data.frame(Region = "All Branches", Parent = "", Branch = "All Branches", 
               Premium = NA, ID = "All Branches")
  ) %>%
  select(labels = ID, parents = Parent, values = Premium)

# Create sunburst
p8 <- plot_ly(
  labels = sunburst_data$labels,
  parents = sunburst_data$parents,
  values = sunburst_data$values,
  type = "sunburst",
  marker = list(
    colorscale = "Viridis",
    cmid = mean(sunburst_data$values, na.rm = TRUE)
  ),
  hovertemplate = "<b>%{label}</b><br>Premium: ₹%{value:,.0f}<extra></extra>"
) %>%
  layout(
    title = list(text = "Interactive Hierarchy: Premium by Region → Branch")
  )

p8

Key Findings & Recommendations

Performance Insights:

  • Top Performers: Agents in Metro regions (Kolkata North, Kolkata South) demonstrate 25-35% higher conversion rates
  • Retention Opportunity: Tier-2 and Rural regions show 8-12% lower retention; targeted intervention recommended
  • Efficiency Correlation: Strong linear relationship between policy volume and premium generation (r > 0.85)

Next Steps:

  1. Investigate retention drivers in underperforming regions
  2. Replicate Metro region best practices to Tier-2/Rural branches
  3. Implement performance-based incentive structure for bottom 10% agents

Report Generated: May 23, 2026