dataset <- readRDS("dataset.rds")
sample_dataset <- dataset |>
group_by(region) |>
sample_frac(0.2) |>
ungroup()
filtered_dataset <- sample_dataset |>
filter(As_of_year == 2024)
str(filtered_dataset)
## tibble [1,562 × 43] (S3: tbl_df/tbl/data.frame)
## $ Identifier : int [1:1562] 421 8569 12616 2961 8457 11149 942 325 8454 11832 ...
## $ Branch_code : int [1:1562] 408 408 408 408 509 93 118 95 96 94 ...
## $ Branch_name : chr [1:1562] "Wao" "Wao" "Wao" "Wao" ...
## $ Department : chr [1:1562] "Operations" "Operations" "Operations" "Operations" ...
## $ No_of_credit_officer : int [1:1562] 3 3 3 3 3 3 3 4 3 3 ...
## $ Total_member : int [1:1562] 1077 1116 1096 1113 1004 1031 768 1182 1036 948 ...
## $ Deposit_balance : num [1:1562] 2266761 2509169 2454813 2467002 6242344 ...
## $ Total_borrower : int [1:1562] 828 824 828 823 947 870 690 1095 971 850 ...
## $ Loan_amount : num [1:1562] 17347000 17433000 17033000 17607000 22519000 ...
## $ PAR_1_30_amount : num [1:1562] 165907 62523 1432518 193570 28571 ...
## $ PAR_1_30_borrower : int [1:1562] 23 8 110 16 2 16 1 2 0 2 ...
## $ PAR_31_60_amount : num [1:1562] 10567 142991 26320 141225 3890 ...
## $ PAR_31_60_borrower : int [1:1562] 4 9 4 11 1 7 0 3 0 5 ...
## $ PAR_61_90_amount : num [1:1562] 7709 79842 79773 57874 28924 ...
## $ PAR_61_90_borrower : int [1:1562] 2 8 5 6 2 4 1 9 0 2 ...
## $ PAR_91_180_amount : num [1:1562] 231719 103840 96288 97037 17621 ...
## $ PAR_91_180_borrower : int [1:1562] 21 13 12 13 2 4 4 8 2 34 ...
## $ PAR_181_365_amount : num [1:1562] 100165 214497 254164 187073 54649 ...
## $ PAR_181_365_borrower : int [1:1562] 19 30 26 26 6 15 3 5 6 13 ...
## $ PAR_above_365_amount : num [1:1562] 1814114 1862816 2031510 1853237 804509 ...
## $ PAR_above_365_borrower : int [1:1562] 283 289 309 287 139 231 38 180 71 147 ...
## $ Portfolio_at_risk : num [1:1562] 2330181 2466509 3920575 2530016 938165 ...
## $ Past_due_ratio : num [1:1562] 0.26 0.29 0.3 0.26 0.08 0.13 0.04 0.1 0.04 0.11 ...
## $ Portfolio_at_risk_par_or_olb: num [1:1562] 2.90e-01 3.10e-01 4.70e-01 2.53e+06 8.00e-02 ...
## $ Monthly_income : num [1:1562] 307033 271218 165958 350461 499742 ...
## $ As_of_month : chr [1:1562] "Jan" "Jun" "Dec" "May" ...
## $ As_of_year : chr [1:1562] "2024" "2024" "2024" "2024" ...
## $ CreatedAt : POSIXct[1:1562], format: "2024-05-11 20:46:08" "2024-07-11 12:23:00" ...
## $ UpdatedAt : POSIXct[1:1562], format: "2024-05-11 20:46:08" "2024-07-11 12:23:00" ...
## $ Division : chr [1:1562] "Division 5" "Division 5" "Division 5" "Division 5" ...
## $ District : chr [1:1562] "District 13" "District 13" "District 13" "District 13" ...
## $ Area : chr [1:1562] "Area 067" "Area 067" "Area 067" "Area 067" ...
## $ Branch_opening_date : Date[1:1562], format: "2018-04-01" "2018-04-01" ...
## $ status : chr [1:1562] "TRUE" "TRUE" "TRUE" "TRUE" ...
## $ street : chr [1:1562] NA NA NA NA ...
## $ barangay : chr [1:1562] "Extension" "Extension" "Extension" "Extension" ...
## $ City_municipality : chr [1:1562] "Wao" "Wao" "Wao" "Wao" ...
## $ province : chr [1:1562] "Lanao Del Sur" "Lanao Del Sur" "Lanao Del Sur" "Lanao Del Sur" ...
## $ region : chr [1:1562] "BARMM" "BARMM" "BARMM" "BARMM" ...
## $ postal_code : chr [1:1562] "8703" "8703" "8703" "8703" ...
## $ Latitude : num [1:1562] 7.64 7.64 7.64 7.64 14.22 ...
## $ Longitude : num [1:1562] 125 125 125 125 121 ...
## $ As_Of_Date : Date[1:1562], format: "2024-01-01" "2024-06-01" ...
if ("New_Members" %in% names(filtered_dataset)) {
membership_trend <- filtered_dataset |>
group_by(As_of_month) |>
summarise(New_Members_Total = sum(New_Members, na.rm = TRUE))
} else {
membership_trend <- filtered_dataset |>
group_by(As_of_month) |>
summarise(New_Members_Total = sum(Total_member, na.rm = TRUE))
}
DT::datatable(membership_trend)
ggplot(membership_trend, aes(x = As_of_month, y = New_Members_Total)) +
geom_line(color = "steelblue", size = 1.2) +
geom_point(size = 2) +
labs(title = "Monthly New Member Enrollment (2024)", x = "Month", y = "New Members") +
theme_minimal() +
scale_y_continuous(labels = comma)
### Which branches experienced the highest growth in members?
if ("New_Members" %in% names(filtered_dataset)) {
member_growth <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Member_Growth = sum(New_Members, na.rm = TRUE)) |>
arrange(desc(Member_Growth)) |>
slice_head(n = 10)
} else {
member_growth <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Member_Growth = sum(Total_member, na.rm = TRUE)) |>
arrange(desc(Member_Growth)) |>
slice_head(n = 10)
}
DT::datatable(member_growth)
members <- ggplot(member_growth, aes(x = reorder(Branch_name, Member_Growth), y = Member_Growth)) +
geom_col(fill = "coral") +
coord_flip() +
labs(title = "Top 10 Branches by Member Growth", x = "Branch", y = "Member Growth") +
theme_minimal() +
scale_y_continuous(labels = comma)
ggplotly(members)
loan_region <- filtered_dataset |>
group_by(region) |>
summarise(Total_Loans = sum(Loan_amount, na.rm = TRUE)) |>
arrange(desc(Total_Loans))
DT::datatable(loan_region)
ggplot(loan_region, aes(x = reorder(region, Total_Loans), y = Total_Loans)) +
geom_col(fill = "orange") +
coord_flip() +
labs(title = "Total Loan Disbursements by Region", x = "Region", y = "Total Loan") +
theme_minimal() +
scale_y_continuous(labels = dollar)
par_ratio <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Total_Loan = sum(Loan_amount, na.rm = TRUE),
Total_PAR = sum(Portfolio_at_risk, na.rm = TRUE),
PAR_Ratio = ifelse(Total_Loan > 0, Total_PAR / Total_Loan, NA)) |>
arrange(desc(PAR_Ratio)) |>
slice_head(n = 10)
DT::datatable(par_ratio)
ggplot(par_ratio, aes(x = reorder(Branch_name, PAR_Ratio), y = PAR_Ratio)) +
geom_col(fill = "red") +
coord_flip() +
labs(title = "Top 10 Branches by PAR Ratio", x = "Branch", y = "PAR Ratio") +
theme_minimal() +
scale_y_continuous(labels = percent)
avg_loan <- filtered_dataset |>
group_by(region) |>
summarise(Avg_Loan_Per_Borrower = mean(ifelse(Total_borrower > 0, Loan_amount / Total_borrower, NA), na.rm = TRUE)) |>
arrange(desc(Avg_Loan_Per_Borrower))
DT::datatable(avg_loan)
ggplot(avg_loan, aes(x = reorder(region, Avg_Loan_Per_Borrower), y = Avg_Loan_Per_Borrower)) +
geom_col(fill = "darkgreen") +
coord_flip() +
labs(title = "Average Loan per Borrower by Region", x = "Region", y = "Avg Loan per Borrower") +
theme_minimal() +
scale_y_continuous(labels = dollar)
borrower_percentage <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Total_Members = sum(Total_member, na.rm = TRUE),
Active_Borrowers = sum(Total_borrower, na.rm = TRUE),
Borrower_Percentage = ifelse(Total_Members > 0, Active_Borrowers / Total_Members * 100, NA)) |>
arrange(desc(Borrower_Percentage)) |>
slice_head(n = 10)
DT::datatable(borrower_percentage)
ggplot(borrower_percentage, aes(x = reorder(Branch_name, Borrower_Percentage), y = Borrower_Percentage)) +
geom_col(fill = "purple") +
coord_flip() +
labs(title = "Top 10 Branches by Borrower Percentage", x = "Branch", y = "Borrower %") +
theme_minimal()
top_borrowers <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Total_Borrowers = sum(Total_borrower, na.rm = TRUE)) |>
arrange(desc(Total_Borrowers)) |>
slice_head(n = 10)
DT::datatable(top_borrowers)
ggplot(top_borrowers, aes(x = reorder(Branch_name, Total_Borrowers), y = Total_Borrowers)) +
geom_col(fill = "purple") +
coord_flip() +
labs(title = "Top 10 Branches by Borrower Count", x = "Branch", y = "Borrower Count") +
theme_minimal()
top_income <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Avg_Monthly_Income = mean(Monthly_income, na.rm = TRUE)) |>
arrange(desc(Avg_Monthly_Income)) |>
slice_head(n = 10)
DT::datatable(top_income)
ggplot(top_income, aes(x = reorder(Branch_name, Avg_Monthly_Income), y = Avg_Monthly_Income)) +
geom_col(fill = "#4CAF50") +
coord_flip() +
labs(title = "Top 10 Branches by Avg Monthly Income", x = "Branch", y = "Avg Monthly Income") +
theme_minimal() +
scale_y_continuous(labels = dollar)
income_corr <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Avg_Income = mean(Monthly_income, na.rm = TRUE),
Total_Loan = sum(Loan_amount, na.rm = TRUE),
Total_PAR = sum(Portfolio_at_risk, na.rm = TRUE))
DT::datatable(income_corr)
ggplot(income_corr, aes(x = Total_Loan, y = Avg_Income, color = Total_PAR)) +
geom_point(size = 3) +
labs(title = "Income vs Loan vs PAR by Branch", x = "Total Loan", y = "Avg Income", color = "Total PAR") +
theme_minimal()
income_variability <- filtered_dataset |>
group_by(region, As_of_month) |>
summarise(Monthly_Income = mean(Monthly_income, na.rm = TRUE)) |>
group_by(region) |>
summarise(Income_SD = sd(Monthly_Income)) |>
arrange(Income_SD)
DT::datatable(income_variability)
ggplot(income_variability, aes(x = reorder(region, Income_SD), y = Income_SD)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(title = "Income Consistency by Region (Lower SD = More Stable)", x = "Region", y = "Income Standard Deviation") +
theme_minimal()
top_deposits <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Total_Deposit = sum(Deposit_balance, na.rm = TRUE)) |>
arrange(desc(Total_Deposit)) |>
slice_head(n = 10)
DT::datatable(top_deposits)
ggplot(top_deposits, aes(x = reorder(Branch_name, Total_Deposit), y = Total_Deposit)) +
geom_col(fill = "dodgerblue") +
coord_flip() +
labs(title = "Top 10 Branches by Total Deposit", x = "Branch", y = "Total Deposit") +
theme_minimal() +
scale_y_continuous(labels = dollar)
avg_deposit <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Total_Deposit = sum(Deposit_balance, na.rm = TRUE),
Total_Members = sum(Total_member, na.rm = TRUE),
Avg_Deposit_Per_Member = ifelse(Total_Members > 0, Total_Deposit / Total_Members, NA)) |>
arrange(desc(Avg_Deposit_Per_Member)) |>
slice_head(n = 10)
DT::datatable(avg_deposit)
ggplot(avg_deposit, aes(x = reorder(Branch_name, Avg_Deposit_Per_Member), y = Avg_Deposit_Per_Member)) +
geom_col(fill = "darkblue") +
coord_flip() +
labs(title = "Top 10 Branches by Avg Deposit per Member", x = "Branch", y = "Avg Deposit per Member") +
theme_minimal() +
scale_y_continuous(labels = dollar)
deposit_income_corr <- avg_deposit |>
left_join(top_income, by = "Branch_name")
DT::datatable(deposit_income_corr)
ggplot(deposit_income_corr, aes(x = Avg_Deposit_Per_Member, y = Avg_Monthly_Income)) +
geom_point(color = "darkred", size = 3) +
labs(title = "Deposit vs Income Correlation by Branch", x = "Avg Deposit per Member", y = "Avg Monthly Income") +
theme_minimal()
branch_performance <- filtered_dataset |>
group_by(Branch_name) |>
summarise(Total_Members = sum(Total_member, na.rm = TRUE),
Total_Borrowers = sum(Total_borrower, na.rm = TRUE),
Total_Income = sum(Monthly_income, na.rm = TRUE),
Total_Deposit = sum(Deposit_balance, na.rm = TRUE),
Total_Loan = sum(Loan_amount, na.rm = TRUE),
Total_PAR = sum(Portfolio_at_risk, na.rm = TRUE)) |>
mutate(Composite_Score = Total_Members + Total_Borrowers + Total_Income + Total_Deposit - Total_PAR) |>
arrange(desc(Composite_Score)) |>
slice_head(n = 10)
DT::datatable(branch_performance)
ggplot(branch_performance, aes(x = reorder(Branch_name, Composite_Score), y = Composite_Score)) +
geom_col(fill = "gold") +
coord_flip() +
labs(title = "Top 10 Branches by Composite Performance Score", x = "Branch", y = "Composite Score") +
theme_minimal()
underperforming <- branch_performance |>
filter(Total_PAR > quantile(Total_PAR, 0.75, na.rm = TRUE),
Total_Income < quantile(Total_Income, 0.25, na.rm = TRUE))
DT::datatable(underperforming)
ggplot(underperforming, aes(x = reorder(Branch_name, Total_PAR), y = Total_PAR)) +
geom_col(fill = "firebrick") +
coord_flip() +
labs(title = "Underperforming Branches (High PAR, Low Income)", x = "Branch", y = "Total PAR") +
theme_minimal()