# Load the dataset
data <- readRDS("dataset.rds")
# Apply stratified sampling by region (20%)
set.seed(123)
sampled_data <- data |>
group_by(region) |>
sample_frac(0.2) |>
ungroup()
# Focus on 2024 data only
current_data <- sampled_data |>
filter(As_of_year == 2024)
# Check structure
str(current_data)
## tibble [1,606 × 43] (S3: tbl_df/tbl/data.frame)
## $ Identifier : int [1:1606] 8569 11937 10580 2313 9789 11144 939 2851 2852 11821 ...
## $ Branch_code : int [1:1606] 408 408 408 408 117 118 120 117 118 509 ...
## $ Branch_name : chr [1:1606] "Wao" "Wao" "Wao" " Wao" ...
## $ Department : chr [1:1606] "Operations" "Operations" "Operations" "Operations" ...
## $ No_of_credit_officer : int [1:1606] 3 3 3 3 3 3 4 3 3 3 ...
## $ Total_member : int [1:1606] 1116 1093 1111 1094 1167 776 1220 1078 781 979 ...
## $ Deposit_balance : num [1:1606] 2509169 2421737 2460582 2318548 5558780 ...
## $ Total_borrower : int [1:1606] 824 814 841 805 1002 683 1136 954 693 887 ...
## $ Loan_amount : num [1:1606] 17433000 16657000 17302000 17326000 22410000 ...
## $ PAR_1_30_amount : num [1:1606] 62523 29296 107746 313516 15785 ...
## $ PAR_1_30_borrower : int [1:1606] 8 4 9 19 4 3 1 2 0 5 ...
## $ PAR_31_60_amount : num [1:1606] 142991 81428 9953 62250 21525 ...
## $ PAR_31_60_borrower : int [1:1606] 9 5 2 9 3 1 1 0 2 5 ...
## $ PAR_61_90_amount : num [1:1606] 79842 35480 65434 47078 19006 ...
## $ PAR_61_90_borrower : int [1:1606] 8 4 6 7 1 4 1 2 0 3 ...
## $ PAR_91_180_amount : num [1:1606] 103840 124699 185272 55671 27528 ...
## $ PAR_91_180_borrower : int [1:1606] 13 14 15 8 2 1 3 3 0 5 ...
## $ PAR_181_365_amount : num [1:1606] 214497 231763 219936 246751 13914 ...
## $ PAR_181_365_borrower : int [1:1606] 30 25 24 32 5 2 2 10 5 4 ...
## $ PAR_above_365_amount : num [1:1606] 1862816 1995157 1914082 1845631 930160 ...
## $ PAR_above_365_borrower : int [1:1606] 289 305 301 284 181 40 57 180 37 143 ...
## $ Portfolio_at_risk : num [1:1606] 2466509 2497822 2502422 2570897 1027918 ...
## $ Past_due_ratio : num [1:1606] 0.29 0.32 0.29 0.26 0.09 0.03 0.02 0.09 0.03 0.07 ...
## $ Portfolio_at_risk_par_or_olb: num [1:1606] 0.31 0.33 0.31 0.29 0.09 ...
## $ Monthly_income : num [1:1606] 271218 265508 304811 306319 509281 ...
## $ As_of_month : chr [1:1606] "Jun" "Nov" "Sep" "Apr" ...
## $ As_of_year : chr [1:1606] "2024" "2024" "2024" "2024" ...
## $ CreatedAt : POSIXct[1:1606], format: "2024-07-11 12:23:00" "2024-12-10 17:42:00" ...
## $ UpdatedAt : POSIXct[1:1606], format: "2024-07-11 12:23:00" "2024-12-10 17:42:00" ...
## $ Division : chr [1:1606] "Division 5" "Division 5" "Division 5" "Division 5" ...
## $ District : chr [1:1606] "District 13" "District 13" "District 13" "District 13" ...
## $ Area : chr [1:1606] "Area 067" "Area 067" "Area 067" "Area 067" ...
## $ Branch_opening_date : Date[1:1606], format: "2018-04-01" "2018-04-01" ...
## $ status : chr [1:1606] "TRUE" "TRUE" "TRUE" "TRUE" ...
## $ street : chr [1:1606] NA NA NA NA ...
## $ barangay : chr [1:1606] "Extension" "Extension" "Extension" "Extension" ...
## $ City_municipality : chr [1:1606] "Wao" "Wao" "Wao" "Wao" ...
## $ province : chr [1:1606] "Lanao Del Sur" "Lanao Del Sur" "Lanao Del Sur" "Lanao Del Sur" ...
## $ region : chr [1:1606] "BARMM" "BARMM" "BARMM" "BARMM" ...
## $ postal_code : chr [1:1606] "8703" "8703" "8703" "8703" ...
## $ Latitude : num [1:1606] 7.64 7.64 7.64 7.64 14.29 ...
## $ Longitude : num [1:1606] 125 125 125 125 121 ...
## $ As_Of_Date : Date[1:1606], format: "2024-06-01" "2024-11-01" ...
membership_trend <- current_data |>
group_by(As_of_month) |>
summarise(New_Members_Total = sum(ifelse("New_Members" %in% names(current_data), New_Members, Total_member), na.rm = TRUE))
DT::datatable(membership_trend)
ggplot(membership_trend, aes(x = As_of_month, y = New_Members_Total)) +
geom_line(color = "blue", size = 1.2) +
geom_point(size = 2) +
labs(title = "New Members per Month (2024)", x = "Month", y = "New Members") +
theme_minimal()
max_month <- membership_trend |> filter(New_Members_Total == max(New_Members_Total))
min_month <- membership_trend |> filter(New_Members_Total == min(New_Members_Total))
max_month
## # A tibble: 1 × 2
## As_of_month New_Members_Total
## <chr> <int>
## 1 Feb 1220
min_month
## # A tibble: 1 × 2
## As_of_month New_Members_Total
## <chr> <int>
## 1 Oct 776
#Which branches had the highest number of new members in 2024?
growth_stats <- current_data |>
group_by(Branch_name) |>
summarise(Members_Added = sum(ifelse("New_Members" %in% names(current_data), New_Members, Total_member), na.rm = TRUE)) |>
arrange(desc(Members_Added)) |>
slice_head(n = 10)
DT::datatable(growth_stats)
ggplot(growth_stats, aes(x = reorder(Branch_name, Members_Added), y = Members_Added)) +
geom_col(fill = "skyblue") +
coord_flip() +
labs(title = "Top 10 Branches by New Members", x = "Branch", y = "New Members") +
theme_minimal()
#Portfolio at Risk (PAR) Ratio by Branch
risk_levels <- current_data |>
group_by(Branch_name) |>
summarise(
Loan_Total = sum(Loan_amount, na.rm = TRUE),
PAR_Total = sum(Portfolio_at_risk, na.rm = TRUE),
Risk_Rate = ifelse(Loan_Total > 0, PAR_Total / Loan_Total, NA)
) |>
arrange(desc(Risk_Rate)) |>
slice_head(n = 10)
DT::datatable(risk_levels)
ggplot(risk_levels, aes(x = reorder(Branch_name, Risk_Rate), y = Risk_Rate)) +
geom_col(fill = "skyblue") +
coord_flip() +
labs(title = "PAR Ratio by Branch (Top 10)", x = "Branch", y = "PAR Ratio") +
theme_minimal() +
scale_y_continuous(labels = percent)
#What is the borrower-to-member ratio for each branch?
borrower_ratio <- current_data |>
group_by(Branch_name) |>
summarise(
Total_Members = sum(Total_member, na.rm = TRUE),
Active_Borrowers = sum(Total_borrower, na.rm = TRUE),
Borrower_Rate = ifelse(Total_Members > 0, Active_Borrowers / Total_Members * 100, NA)
) |>
arrange(desc(Borrower_Rate)) |>
slice_head(n = 10)
DT::datatable(borrower_ratio)
ggplot(borrower_ratio, aes(x = reorder(Branch_name, Borrower_Rate), y = Borrower_Rate)) +
geom_col(fill = "pink") +
coord_flip() +
labs(title = "Top 10 Branches by Borrower Percentage", x = "Branch", y = "Borrower %") +
theme_minimal()
#Which region registered the most new members in 2024?
region_members <- current_data |>
group_by(region) |>
summarise(New_Members_Total = sum(ifelse("New_Members" %in% names(current_data), New_Members, Total_member), na.rm = TRUE)) |>
arrange(desc(New_Members_Total))
DT::datatable(region_members)
ggplot(region_members, aes(x = reorder(region, New_Members_Total), y = New_Members_Total)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(title = "New Members by Region (2024)", x = "Region", y = "New Members") +
theme_minimal()
# Which branches have the highest number of active borrowers in
2024?
top_borrowers <- current_data |>
group_by(Branch_name) |>
summarise(Active_Borrowers = sum(Total_borrower, na.rm = TRUE)) |>
arrange(desc(Active_Borrowers)) |>
slice_head(n = 10)
DT::datatable(top_borrowers)
ggplot(top_borrowers, aes(x = reorder(Branch_name, Active_Borrowers), y = Active_Borrowers)) +
geom_col(fill = "skyblue") +
coord_flip() +
labs(title = "Top 10 Branches by Active Borrowers (2024)", x = "Branch", y = "Active Borrowers") +
theme_minimal()
#Total Loans Disbursed by Branch (2024)
``` r
branch_loans <- current_data |>
group_by(Branch_name) |>
summarise(Loan_Issued = sum(Loan_amount, na.rm = TRUE)) |>
arrange(desc(Loan_Issued)) |>
slice_head(n = 10)
ggplot(branch_loans, aes(x = reorder(Branch_name, Loan_Issued), y = Loan_Issued)) +
geom_col(fill = "skyblue") +
coord_flip() +
labs(title = "Top 10 Branches by Loan Disbursement", x = "Branch", y = "Total Loan") +
theme_minimal() +
scale_y_continuous(labels = scales::dollar)