📌 Data Preparation & Sampling

# Load the dataset
data <- readRDS("dataset.rds")

# Apply stratified sampling by region (20%)
set.seed(123)
sampled_data <- data |>
  group_by(region) |>
  sample_frac(0.2) |>
  ungroup()

# Focus on 2024 data only
current_data <- sampled_data |>
  filter(As_of_year == 2024)

# Check structure
str(current_data)
## tibble [1,606 × 43] (S3: tbl_df/tbl/data.frame)
##  $ Identifier                  : int [1:1606] 8569 11937 10580 2313 9789 11144 939 2851 2852 11821 ...
##  $ Branch_code                 : int [1:1606] 408 408 408 408 117 118 120 117 118 509 ...
##  $ Branch_name                 : chr [1:1606] "Wao" "Wao" "Wao" " Wao" ...
##  $ Department                  : chr [1:1606] "Operations" "Operations" "Operations" "Operations" ...
##  $ No_of_credit_officer        : int [1:1606] 3 3 3 3 3 3 4 3 3 3 ...
##  $ Total_member                : int [1:1606] 1116 1093 1111 1094 1167 776 1220 1078 781 979 ...
##  $ Deposit_balance             : num [1:1606] 2509169 2421737 2460582 2318548 5558780 ...
##  $ Total_borrower              : int [1:1606] 824 814 841 805 1002 683 1136 954 693 887 ...
##  $ Loan_amount                 : num [1:1606] 17433000 16657000 17302000 17326000 22410000 ...
##  $ PAR_1_30_amount             : num [1:1606] 62523 29296 107746 313516 15785 ...
##  $ PAR_1_30_borrower           : int [1:1606] 8 4 9 19 4 3 1 2 0 5 ...
##  $ PAR_31_60_amount            : num [1:1606] 142991 81428 9953 62250 21525 ...
##  $ PAR_31_60_borrower          : int [1:1606] 9 5 2 9 3 1 1 0 2 5 ...
##  $ PAR_61_90_amount            : num [1:1606] 79842 35480 65434 47078 19006 ...
##  $ PAR_61_90_borrower          : int [1:1606] 8 4 6 7 1 4 1 2 0 3 ...
##  $ PAR_91_180_amount           : num [1:1606] 103840 124699 185272 55671 27528 ...
##  $ PAR_91_180_borrower         : int [1:1606] 13 14 15 8 2 1 3 3 0 5 ...
##  $ PAR_181_365_amount          : num [1:1606] 214497 231763 219936 246751 13914 ...
##  $ PAR_181_365_borrower        : int [1:1606] 30 25 24 32 5 2 2 10 5 4 ...
##  $ PAR_above_365_amount        : num [1:1606] 1862816 1995157 1914082 1845631 930160 ...
##  $ PAR_above_365_borrower      : int [1:1606] 289 305 301 284 181 40 57 180 37 143 ...
##  $ Portfolio_at_risk           : num [1:1606] 2466509 2497822 2502422 2570897 1027918 ...
##  $ Past_due_ratio              : num [1:1606] 0.29 0.32 0.29 0.26 0.09 0.03 0.02 0.09 0.03 0.07 ...
##  $ Portfolio_at_risk_par_or_olb: num [1:1606] 0.31 0.33 0.31 0.29 0.09 ...
##  $ Monthly_income              : num [1:1606] 271218 265508 304811 306319 509281 ...
##  $ As_of_month                 : chr [1:1606] "Jun" "Nov" "Sep" "Apr" ...
##  $ As_of_year                  : chr [1:1606] "2024" "2024" "2024" "2024" ...
##  $ CreatedAt                   : POSIXct[1:1606], format: "2024-07-11 12:23:00" "2024-12-10 17:42:00" ...
##  $ UpdatedAt                   : POSIXct[1:1606], format: "2024-07-11 12:23:00" "2024-12-10 17:42:00" ...
##  $ Division                    : chr [1:1606] "Division 5" "Division 5" "Division 5" "Division 5" ...
##  $ District                    : chr [1:1606] "District 13" "District 13" "District 13" "District 13" ...
##  $ Area                        : chr [1:1606] "Area 067" "Area 067" "Area 067" "Area 067" ...
##  $ Branch_opening_date         : Date[1:1606], format: "2018-04-01" "2018-04-01" ...
##  $ status                      : chr [1:1606] "TRUE" "TRUE" "TRUE" "TRUE" ...
##  $ street                      : chr [1:1606] NA NA NA NA ...
##  $ barangay                    : chr [1:1606] "Extension" "Extension" "Extension" "Extension" ...
##  $ City_municipality           : chr [1:1606] "Wao" "Wao" "Wao" "Wao" ...
##  $ province                    : chr [1:1606] "Lanao Del Sur" "Lanao Del Sur" "Lanao Del Sur" "Lanao Del Sur" ...
##  $ region                      : chr [1:1606] "BARMM" "BARMM" "BARMM" "BARMM" ...
##  $ postal_code                 : chr [1:1606] "8703" "8703" "8703" "8703" ...
##  $ Latitude                    : num [1:1606] 7.64 7.64 7.64 7.64 14.29 ...
##  $ Longitude                   : num [1:1606] 125 125 125 125 121 ...
##  $ As_Of_Date                  : Date[1:1606], format: "2024-06-01" "2024-11-01" ...

What is the monthly trend of new member registrations in 2024?

membership_trend <- current_data |>
  group_by(As_of_month) |>
  summarise(New_Members_Total = sum(ifelse("New_Members" %in% names(current_data), New_Members, Total_member), na.rm = TRUE))

DT::datatable(membership_trend)
ggplot(membership_trend, aes(x = As_of_month, y = New_Members_Total)) +
  geom_line(color = "blue", size = 1.2) +
  geom_point(size = 2) +
  labs(title = "New Members per Month (2024)", x = "Month", y = "New Members") +
  theme_minimal()

Which month had the highest and lowest member registration?

max_month <- membership_trend |> filter(New_Members_Total == max(New_Members_Total))
min_month <- membership_trend |> filter(New_Members_Total == min(New_Members_Total))

max_month
## # A tibble: 1 × 2
##   As_of_month New_Members_Total
##   <chr>                   <int>
## 1 Feb                      1220
min_month
## # A tibble: 1 × 2
##   As_of_month New_Members_Total
##   <chr>                   <int>
## 1 Oct                       776

#Which branches had the highest number of new members in 2024?

growth_stats <- current_data |>
  group_by(Branch_name) |>
  summarise(Members_Added = sum(ifelse("New_Members" %in% names(current_data), New_Members, Total_member), na.rm = TRUE)) |>
  arrange(desc(Members_Added)) |>
  slice_head(n = 10)

DT::datatable(growth_stats)
ggplot(growth_stats, aes(x = reorder(Branch_name, Members_Added), y = Members_Added)) +
  geom_col(fill = "skyblue") +
  coord_flip() +
  labs(title = "Top 10 Branches by New Members", x = "Branch", y = "New Members") +
  theme_minimal()

#Portfolio at Risk (PAR) Ratio by Branch

risk_levels <- current_data |>
  group_by(Branch_name) |>
  summarise(
    Loan_Total = sum(Loan_amount, na.rm = TRUE),
    PAR_Total = sum(Portfolio_at_risk, na.rm = TRUE),
    Risk_Rate = ifelse(Loan_Total > 0, PAR_Total / Loan_Total, NA)
  ) |>
  arrange(desc(Risk_Rate)) |>
  slice_head(n = 10)

DT::datatable(risk_levels)
ggplot(risk_levels, aes(x = reorder(Branch_name, Risk_Rate), y = Risk_Rate)) +
  geom_col(fill = "skyblue") +
  coord_flip() +
  labs(title = "PAR Ratio by Branch (Top 10)", x = "Branch", y = "PAR Ratio") +
  theme_minimal() +
  scale_y_continuous(labels = percent)

#What is the borrower-to-member ratio for each branch?

borrower_ratio <- current_data |>
  group_by(Branch_name) |>
  summarise(
    Total_Members = sum(Total_member, na.rm = TRUE),
    Active_Borrowers = sum(Total_borrower, na.rm = TRUE),
    Borrower_Rate = ifelse(Total_Members > 0, Active_Borrowers / Total_Members * 100, NA)
  ) |>
  arrange(desc(Borrower_Rate)) |>
  slice_head(n = 10)

DT::datatable(borrower_ratio)
ggplot(borrower_ratio, aes(x = reorder(Branch_name, Borrower_Rate), y = Borrower_Rate)) +
  geom_col(fill = "pink") +
  coord_flip() +
  labs(title = "Top 10 Branches by Borrower Percentage", x = "Branch", y = "Borrower %") +
  theme_minimal()

#Which region registered the most new members in 2024?

region_members <- current_data |>
  group_by(region) |>
  summarise(New_Members_Total = sum(ifelse("New_Members" %in% names(current_data), New_Members, Total_member), na.rm = TRUE)) |>
  arrange(desc(New_Members_Total))

DT::datatable(region_members)
ggplot(region_members, aes(x = reorder(region, New_Members_Total), y = New_Members_Total)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "New Members by Region (2024)", x = "Region", y = "New Members") +
  theme_minimal()

# Which branches have the highest number of active borrowers in 2024?

top_borrowers <- current_data |>
  group_by(Branch_name) |>
  summarise(Active_Borrowers = sum(Total_borrower, na.rm = TRUE)) |>
  arrange(desc(Active_Borrowers)) |>
  slice_head(n = 10)

DT::datatable(top_borrowers)
ggplot(top_borrowers, aes(x = reorder(Branch_name, Active_Borrowers), y = Active_Borrowers)) +
  geom_col(fill = "skyblue") +
  coord_flip() +
  labs(title = "Top 10 Branches by Active Borrowers (2024)", x = "Branch", y = "Active Borrowers") +
  theme_minimal()


#Total Loans Disbursed by Branch (2024)


``` r
branch_loans <- current_data |>
  group_by(Branch_name) |>
  summarise(Loan_Issued = sum(Loan_amount, na.rm = TRUE)) |>
  arrange(desc(Loan_Issued)) |>
  slice_head(n = 10)
  
ggplot(branch_loans, aes(x = reorder(Branch_name, Loan_Issued), y = Loan_Issued)) +
  geom_col(fill = "skyblue") +
  coord_flip() +
  labs(title = "Top 10 Branches by Loan Disbursement", x = "Branch", y = "Total Loan") +
  theme_minimal() +
  scale_y_continuous(labels = scales::dollar)