📌 Project Setup and Sampling

dataset <- readRDS("dataset.rds")
sample_dataset <- dataset |>
  group_by(region) |>
  sample_frac(0.2) |>
  ungroup()
filtered_dataset <- sample_dataset |>
  filter(As_of_year == 2024)
str(filtered_dataset)
## tibble [1,562 × 43] (S3: tbl_df/tbl/data.frame)
##  $ Identifier                  : int [1:1562] 421 8569 12616 2961 8457 11149 942 325 8454 11832 ...
##  $ Branch_code                 : int [1:1562] 408 408 408 408 509 93 118 95 96 94 ...
##  $ Branch_name                 : chr [1:1562] "Wao" "Wao" "Wao" "Wao" ...
##  $ Department                  : chr [1:1562] "Operations" "Operations" "Operations" "Operations" ...
##  $ No_of_credit_officer        : int [1:1562] 3 3 3 3 3 3 3 4 3 3 ...
##  $ Total_member                : int [1:1562] 1077 1116 1096 1113 1004 1031 768 1182 1036 948 ...
##  $ Deposit_balance             : num [1:1562] 2266761 2509169 2454813 2467002 6242344 ...
##  $ Total_borrower              : int [1:1562] 828 824 828 823 947 870 690 1095 971 850 ...
##  $ Loan_amount                 : num [1:1562] 17347000 17433000 17033000 17607000 22519000 ...
##  $ PAR_1_30_amount             : num [1:1562] 165907 62523 1432518 193570 28571 ...
##  $ PAR_1_30_borrower           : int [1:1562] 23 8 110 16 2 16 1 2 0 2 ...
##  $ PAR_31_60_amount            : num [1:1562] 10567 142991 26320 141225 3890 ...
##  $ PAR_31_60_borrower          : int [1:1562] 4 9 4 11 1 7 0 3 0 5 ...
##  $ PAR_61_90_amount            : num [1:1562] 7709 79842 79773 57874 28924 ...
##  $ PAR_61_90_borrower          : int [1:1562] 2 8 5 6 2 4 1 9 0 2 ...
##  $ PAR_91_180_amount           : num [1:1562] 231719 103840 96288 97037 17621 ...
##  $ PAR_91_180_borrower         : int [1:1562] 21 13 12 13 2 4 4 8 2 34 ...
##  $ PAR_181_365_amount          : num [1:1562] 100165 214497 254164 187073 54649 ...
##  $ PAR_181_365_borrower        : int [1:1562] 19 30 26 26 6 15 3 5 6 13 ...
##  $ PAR_above_365_amount        : num [1:1562] 1814114 1862816 2031510 1853237 804509 ...
##  $ PAR_above_365_borrower      : int [1:1562] 283 289 309 287 139 231 38 180 71 147 ...
##  $ Portfolio_at_risk           : num [1:1562] 2330181 2466509 3920575 2530016 938165 ...
##  $ Past_due_ratio              : num [1:1562] 0.26 0.29 0.3 0.26 0.08 0.13 0.04 0.1 0.04 0.11 ...
##  $ Portfolio_at_risk_par_or_olb: num [1:1562] 2.90e-01 3.10e-01 4.70e-01 2.53e+06 8.00e-02 ...
##  $ Monthly_income              : num [1:1562] 307033 271218 165958 350461 499742 ...
##  $ As_of_month                 : chr [1:1562] "Jan" "Jun" "Dec" "May" ...
##  $ As_of_year                  : chr [1:1562] "2024" "2024" "2024" "2024" ...
##  $ CreatedAt                   : POSIXct[1:1562], format: "2024-05-11 20:46:08" "2024-07-11 12:23:00" ...
##  $ UpdatedAt                   : POSIXct[1:1562], format: "2024-05-11 20:46:08" "2024-07-11 12:23:00" ...
##  $ Division                    : chr [1:1562] "Division 5" "Division 5" "Division 5" "Division 5" ...
##  $ District                    : chr [1:1562] "District 13" "District 13" "District 13" "District 13" ...
##  $ Area                        : chr [1:1562] "Area 067" "Area 067" "Area 067" "Area 067" ...
##  $ Branch_opening_date         : Date[1:1562], format: "2018-04-01" "2018-04-01" ...
##  $ status                      : chr [1:1562] "TRUE" "TRUE" "TRUE" "TRUE" ...
##  $ street                      : chr [1:1562] NA NA NA NA ...
##  $ barangay                    : chr [1:1562] "Extension" "Extension" "Extension" "Extension" ...
##  $ City_municipality           : chr [1:1562] "Wao" "Wao" "Wao" "Wao" ...
##  $ province                    : chr [1:1562] "Lanao Del Sur" "Lanao Del Sur" "Lanao Del Sur" "Lanao Del Sur" ...
##  $ region                      : chr [1:1562] "BARMM" "BARMM" "BARMM" "BARMM" ...
##  $ postal_code                 : chr [1:1562] "8703" "8703" "8703" "8703" ...
##  $ Latitude                    : num [1:1562] 7.64 7.64 7.64 7.64 14.22 ...
##  $ Longitude                   : num [1:1562] 125 125 125 125 121 ...
##  $ As_Of_Date                  : Date[1:1562], format: "2024-01-01" "2024-06-01" ...

📌 Membership Growth & Distribution

What is the trend in new member enrollment month-by-month in 2024?

if ("New_Members" %in% names(filtered_dataset)) {
  membership_trend <- filtered_dataset |>
    group_by(As_of_month) |>
    summarise(New_Members_Total = sum(New_Members, na.rm = TRUE))
} else {
  membership_trend <- filtered_dataset |>
    group_by(As_of_month) |>
    summarise(New_Members_Total = sum(Total_member, na.rm = TRUE))
}

DT::datatable(membership_trend)
ggplot(membership_trend, aes(x = As_of_month, y = New_Members_Total)) +
  geom_line(color = "steelblue", size = 1.2) +
  geom_point(size = 2) +
  labs(title = "Monthly New Member Enrollment (2024)", x = "Month", y = "New Members") +
  theme_minimal() +
  scale_y_continuous(labels = comma)

### Which branches experienced the highest growth in members?

if ("New_Members" %in% names(filtered_dataset)) {
  member_growth <- filtered_dataset |>
    group_by(Branch_name) |>
    summarise(Member_Growth = sum(New_Members, na.rm = TRUE)) |>
    arrange(desc(Member_Growth)) |>
    slice_head(n = 10)
} else {
  member_growth <- filtered_dataset |>
    group_by(Branch_name) |>
    summarise(Member_Growth = sum(Total_member, na.rm = TRUE)) |>
    arrange(desc(Member_Growth)) |>
    slice_head(n = 10)
}

DT::datatable(member_growth)
members <- ggplot(member_growth, aes(x = reorder(Branch_name, Member_Growth), y = Member_Growth)) +
  geom_col(fill = "coral") +
  coord_flip() +
  labs(title = "Top 10 Branches by Member Growth", x = "Branch", y = "Member Growth") +
  theme_minimal() +
  scale_y_continuous(labels = comma)

ggplotly(members)

📌 Loan Performance & Risk

Which branches or regions have the highest total loan disbursements in 2024?

loan_region <- filtered_dataset |>
  group_by(region) |>
  summarise(Total_Loans = sum(Loan_amount, na.rm = TRUE)) |>
  arrange(desc(Total_Loans))

DT::datatable(loan_region)
ggplot(loan_region, aes(x = reorder(region, Total_Loans), y = Total_Loans)) +
  geom_col(fill = "orange") +
  coord_flip() +
  labs(title = "Total Loan Disbursements by Region", x = "Region", y = "Total Loan") +
  theme_minimal() +
  scale_y_continuous(labels = dollar)

What is the Portfolio at Risk (PAR) ratio per branch, and which branches are most at risk?

par_ratio <- filtered_dataset |>
  group_by(Branch_name) |>
  summarise(Total_Loan = sum(Loan_amount, na.rm = TRUE),
            Total_PAR = sum(Portfolio_at_risk, na.rm = TRUE),
            PAR_Ratio = ifelse(Total_Loan > 0, Total_PAR / Total_Loan, NA)) |>
  arrange(desc(PAR_Ratio)) |>
  slice_head(n = 10)

DT::datatable(par_ratio)
ggplot(par_ratio, aes(x = reorder(Branch_name, PAR_Ratio), y = PAR_Ratio)) +
  geom_col(fill = "red") +
  coord_flip() +
  labs(title = "Top 10 Branches by PAR Ratio", x = "Branch", y = "PAR Ratio") +
  theme_minimal() +
  scale_y_continuous(labels = percent)

How does the average loan amount per borrower differ across regions?

avg_loan <- filtered_dataset |>
  group_by(region) |>
  summarise(Avg_Loan_Per_Borrower = mean(ifelse(Total_borrower > 0, Loan_amount / Total_borrower, NA), na.rm = TRUE)) |>
  arrange(desc(Avg_Loan_Per_Borrower))

DT::datatable(avg_loan)
ggplot(avg_loan, aes(x = reorder(region, Avg_Loan_Per_Borrower), y = Avg_Loan_Per_Borrower)) +
  geom_col(fill = "darkgreen") +
  coord_flip() +
  labs(title = "Average Loan per Borrower by Region", x = "Region", y = "Avg Loan per Borrower") +
  theme_minimal() +
  scale_y_continuous(labels = dollar)

📌 Borrower Insights

What percentage of members are active borrowers in each branch?

borrower_percentage <- filtered_dataset |>
  group_by(Branch_name) |>
  summarise(Total_Members = sum(Total_member, na.rm = TRUE),
            Active_Borrowers = sum(Total_borrower, na.rm = TRUE),
            Borrower_Percentage = ifelse(Total_Members > 0, Active_Borrowers / Total_Members * 100, NA)) |>
  arrange(desc(Borrower_Percentage)) |>
  slice_head(n = 10)

DT::datatable(borrower_percentage)
ggplot(borrower_percentage, aes(x = reorder(Branch_name, Borrower_Percentage), y = Borrower_Percentage)) +
  geom_col(fill = "purple") +
  coord_flip() +
  labs(title = "Top 10 Branches by Borrower Percentage", x = "Branch", y = "Borrower %") +
  theme_minimal()

Which branches have the highest number of borrowers?

top_borrowers <- filtered_dataset |>
  group_by(Branch_name) |>
  summarise(Total_Borrowers = sum(Total_borrower, na.rm = TRUE)) |>
  arrange(desc(Total_Borrowers)) |>
  slice_head(n = 10)

DT::datatable(top_borrowers)
ggplot(top_borrowers, aes(x = reorder(Branch_name, Total_Borrowers), y = Total_Borrowers)) +
  geom_col(fill = "purple") +
  coord_flip() +
  labs(title = "Top 10 Branches by Borrower Count", x = "Branch", y = "Borrower Count") +
  theme_minimal()

📌 Income Analysis

Which branches generate the highest average monthly income?

top_income <- filtered_dataset |>
  group_by(Branch_name) |>
  summarise(Avg_Monthly_Income = mean(Monthly_income, na.rm = TRUE)) |>
  arrange(desc(Avg_Monthly_Income)) |>
  slice_head(n = 10)

DT::datatable(top_income)
ggplot(top_income, aes(x = reorder(Branch_name, Avg_Monthly_Income), y = Avg_Monthly_Income)) +
  geom_col(fill = "#4CAF50") +
  coord_flip() +
  labs(title = "Top 10 Branches by Avg Monthly Income", x = "Branch", y = "Avg Monthly Income") +
  theme_minimal() +
  scale_y_continuous(labels = dollar)

How does average income correlate with loan amount or PAR?

income_corr <- filtered_dataset |>
  group_by(Branch_name) |>
  summarise(Avg_Income = mean(Monthly_income, na.rm = TRUE),
            Total_Loan = sum(Loan_amount, na.rm = TRUE),
            Total_PAR = sum(Portfolio_at_risk, na.rm = TRUE))

DT::datatable(income_corr)
ggplot(income_corr, aes(x = Total_Loan, y = Avg_Income, color = Total_PAR)) +
  geom_point(size = 3) +
  labs(title = "Income vs Loan vs PAR by Branch", x = "Total Loan", y = "Avg Income", color = "Total PAR") +
  theme_minimal()

Which region has the most consistent monthly income across the year?

income_variability <- filtered_dataset |>
  group_by(region, As_of_month) |>
  summarise(Monthly_Income = mean(Monthly_income, na.rm = TRUE)) |>
  group_by(region) |>
  summarise(Income_SD = sd(Monthly_Income)) |>
  arrange(Income_SD)

DT::datatable(income_variability)
ggplot(income_variability, aes(x = reorder(region, Income_SD), y = Income_SD)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "Income Consistency by Region (Lower SD = More Stable)", x = "Region", y = "Income Standard Deviation") +
  theme_minimal()

📌 Deposit & Liquidity Health

Which branches have the highest total deposit balance?

top_deposits <- filtered_dataset |>
  group_by(Branch_name) |>
  summarise(Total_Deposit = sum(Deposit_balance, na.rm = TRUE)) |>
  arrange(desc(Total_Deposit)) |>
  slice_head(n = 10)

DT::datatable(top_deposits)
ggplot(top_deposits, aes(x = reorder(Branch_name, Total_Deposit), y = Total_Deposit)) +
  geom_col(fill = "dodgerblue") +
  coord_flip() +
  labs(title = "Top 10 Branches by Total Deposit", x = "Branch", y = "Total Deposit") +
  theme_minimal() +
  scale_y_continuous(labels = dollar)

What is the average deposit balance per member in each branch?

avg_deposit <- filtered_dataset |>
  group_by(Branch_name) |>
  summarise(Total_Deposit = sum(Deposit_balance, na.rm = TRUE),
            Total_Members = sum(Total_member, na.rm = TRUE),
            Avg_Deposit_Per_Member = ifelse(Total_Members > 0, Total_Deposit / Total_Members, NA)) |>
  arrange(desc(Avg_Deposit_Per_Member)) |>
  slice_head(n = 10)

DT::datatable(avg_deposit)
ggplot(avg_deposit, aes(x = reorder(Branch_name, Avg_Deposit_Per_Member), y = Avg_Deposit_Per_Member)) +
  geom_col(fill = "darkblue") +
  coord_flip() +
  labs(title = "Top 10 Branches by Avg Deposit per Member", x = "Branch", y = "Avg Deposit per Member") +
  theme_minimal() +
  scale_y_continuous(labels = dollar)

Is there a correlation between deposit balance and income per branch?

deposit_income_corr <- avg_deposit |>
  left_join(top_income, by = "Branch_name")

DT::datatable(deposit_income_corr)
ggplot(deposit_income_corr, aes(x = Avg_Deposit_Per_Member, y = Avg_Monthly_Income)) +
  geom_point(color = "darkred", size = 3) +
  labs(title = "Deposit vs Income Correlation by Branch", x = "Avg Deposit per Member", y = "Avg Monthly Income") +
  theme_minimal()

📌 Branch-Level Performance Comparisons

Which branches rank highest in combined performance?

branch_performance <- filtered_dataset |>
  group_by(Branch_name) |>
  summarise(Total_Members = sum(Total_member, na.rm = TRUE),
            Total_Borrowers = sum(Total_borrower, na.rm = TRUE),
            Total_Income = sum(Monthly_income, na.rm = TRUE),
            Total_Deposit = sum(Deposit_balance, na.rm = TRUE),
            Total_Loan = sum(Loan_amount, na.rm = TRUE),
            Total_PAR = sum(Portfolio_at_risk, na.rm = TRUE)) |>
  mutate(Composite_Score = Total_Members + Total_Borrowers + Total_Income + Total_Deposit - Total_PAR) |>
  arrange(desc(Composite_Score)) |>
  slice_head(n = 10)

DT::datatable(branch_performance)
ggplot(branch_performance, aes(x = reorder(Branch_name, Composite_Score), y = Composite_Score)) +
  geom_col(fill = "gold") +
  coord_flip() +
  labs(title = "Top 10 Branches by Composite Performance Score", x = "Branch", y = "Composite Score") +
  theme_minimal()

Are there any underperforming branches with high risk and low return?

underperforming <- branch_performance |>
  filter(Total_PAR > quantile(Total_PAR, 0.75, na.rm = TRUE),
         Total_Income < quantile(Total_Income, 0.25, na.rm = TRUE))

DT::datatable(underperforming)
ggplot(underperforming, aes(x = reorder(Branch_name, Total_PAR), y = Total_PAR)) +
  geom_col(fill = "firebrick") +
  coord_flip() +
  labs(title = "Underperforming Branches (High PAR, Low Income)", x = "Branch", y = "Total PAR") +
  theme_minimal()