df_loan <- read.csv('https://storage.googleapis.com/dqlab-dataset/loan_disbursement.csv', stringsAsFactors = F)
dplyr::glimpse(df_loan)
## Rows: 9,754
## Columns: 5
## $ loan_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ tanggal_cair <chr> "2020-01-06", "2020-01-06", "2020-01-06", "2020-01-06", "…
## $ cabang <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AB", "AB", "AB", "AB…
## $ agen <chr> "AA-1", "AA-1", "AA-1", "AA-2", "AA-2", "AA-2", "AB-1", "…
## $ amount <int> 320000, 440000, 200000, 430000, 360000, 220000, 320000, 4…
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df_loan_mei <- df_loan %>%
filter(tanggal_cair >= '2020-05-01', tanggal_cair <= '2020-05-3') %>%
group_by(cabang) %>%
summarise(total_amount = sum(amount))
df_loan_mei
## # A tibble: 22 × 2
## cabang total_amount
## <chr> <int>
## 1 AA 75710000
## 2 AB 81440000
## 3 AC 83990000
## 4 AD 76080000
## 5 AE 54200000
## 6 AF 68040000
## 7 AG 74080000
## 8 AH 73840000
## 9 AI 46640000
## 10 AJ 43580000
## # … with 12 more rows
library(dplyr)
library(scales)
## Warning: package 'scales' was built under R version 4.2.2
df_loan_mei %>%
arrange(desc(total_amount)) %>%
mutate(total_amount = comma(total_amount)) %>%
head(5)
## # A tibble: 5 × 2
## cabang total_amount
## <chr> <chr>
## 1 AC 83,990,000
## 2 AB 81,440,000
## 3 AD 76,080,000
## 4 AA 75,710,000
## 5 AG 74,080,000
library(dplyr)
library(scales)
df_loan_mei %>%
arrange(total_amount) %>%
mutate(total_amount = comma(total_amount)) %>%
head(5)
## # A tibble: 5 × 2
## cabang total_amount
## <chr> <chr>
## 1 AV 30,280,000
## 2 AS 31,740,000
## 3 AT 34,840,000
## 4 AU 35,610,000
## 5 AO 39,120,000
library(dplyr)
df_cabang_umur <- df_loan %>%
group_by(cabang) %>%
summarise(pertama_cair = min(tanggal_cair)) %>%
mutate(umur = as.numeric(as.Date('2020-05-15') - as.Date( pertama_cair))%/%30)
df_cabang_umur
## # A tibble: 22 × 3
## cabang pertama_cair umur
## <chr> <chr> <dbl>
## 1 AA 2020-01-06 4
## 2 AB 2020-01-06 4
## 3 AC 2020-01-06 4
## 4 AD 2020-01-06 4
## 5 AE 2020-02-03 3
## 6 AF 2020-02-03 3
## 7 AG 2020-02-03 3
## 8 AH 2020-02-03 3
## 9 AI 2020-03-02 2
## 10 AJ 2020-03-02 2
## # … with 12 more rows
library(dplyr)
df_loan_mei_umur <- df_cabang_umur %>%
inner_join(df_loan_mei, by = 'cabang')
df_loan_mei_umur
## # A tibble: 22 × 4
## cabang pertama_cair umur total_amount
## <chr> <chr> <dbl> <int>
## 1 AA 2020-01-06 4 75710000
## 2 AB 2020-01-06 4 81440000
## 3 AC 2020-01-06 4 83990000
## 4 AD 2020-01-06 4 76080000
## 5 AE 2020-02-03 3 54200000
## 6 AF 2020-02-03 3 68040000
## 7 AG 2020-02-03 3 74080000
## 8 AH 2020-02-03 3 73840000
## 9 AI 2020-03-02 2 46640000
## 10 AJ 2020-03-02 2 43580000
## # … with 12 more rows
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.2
ggplot(df_loan_mei_umur, aes(x = umur, y = total_amount)) +
geom_point() +
scale_y_continuous(labels = scales::comma) +
labs(title = "Semakin berumur, perfoma cabang akan semakin baik",
x = "Umur (bulan)",
y = "Total Amount")
library(dplyr)
library(scales)
df_loan_mei_flag <- df_loan_mei_umur %>%
group_by(umur) %>%
mutate(Q1 = quantile(total_amount, 0.25),
Q3 = quantile(total_amount, 0.75),
IQR = (Q3-Q1)) %>%
mutate(flag = ifelse(total_amount < (Q1 - IQR), 'rendah','baik'))
df_loan_mei_flag %>%
filter(flag == 'rendah') %>%
mutate_if(is.numeric, funs(comma))
## `mutate_if()` ignored the following grouping variables:
## • Column `umur`
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
##
## # Simple named list: list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
##
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## # A tibble: 2 × 8
## # Groups: umur [2]
## cabang pertama_cair umur total_amount Q1 Q3 IQR flag
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 AE 2020-02-03 3 54,200,000 64,580,000 73,900,000 9,320,000 rendah
## 2 AL 2020-03-02 2 40,650,000 43,580,000 44,590,000 1,010,000 rendah
library(ggplot2)
ggplot(df_loan_mei_flag, aes(x = umur, y = total_amount)) +
geom_point(aes(color = flag)) +
scale_color_manual(breaks = c("baik", "rendah"),
values=c("blue", "red")) +
scale_y_continuous(labels = scales::comma) +
labs(title = "Ada cabang berpeforma rendah padahal tidak termasuk bottom 5 nasional",
color = "",
x = "Umur (bulan)",
y = "Total Amount")
library(dplyr)
library(scales)
df_loan_mei_flag %>%
filter(umur == 3) %>%
inner_join(df_loan, by = 'cabang') %>%
filter(tanggal_cair >= '2020-05-01', tanggal_cair <= '2020-05-31') %>%
group_by(cabang, flag) %>%
summarise(jumlah_hari = n_distinct(tanggal_cair),
agen_aktif = n_distinct(agen),
total_loan_cair = n_distinct(loan_id),
avg_amount = mean(amount),
total_amount = sum(amount)) %>%
arrange(total_amount) %>%
mutate_if(is.numeric, funs(comma))
## `summarise()` has grouped output by 'cabang'. You can override using the
## `.groups` argument.
## `mutate_if()` ignored the following grouping variables:
## # A tibble: 4 × 7
## # Groups: cabang [4]
## cabang flag jumlah_hari agen_aktif total_loan_cair avg_amount total_amount
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 AE rendah 21 3 175 309,714 54,200,000
## 2 AF baik 21 3 225 302,400 68,040,000
## 3 AH baik 21 3 241 306,390 73,840,000
## 4 AG baik 21 3 241 307,386 74,080,000
library(dplyr)
library(scales)
df_loan_mei_flag %>%
filter(umur == 3, flag == 'rendah') %>%
inner_join(df_loan, by = 'cabang') %>%
filter(tanggal_cair >= '2020-05-01', tanggal_cair <= '2020-05-31') %>%
group_by(cabang, agen) %>%
summarise(jumlah_hari = n_distinct(tanggal_cair),
total_loan_cair = n_distinct(loan_id),
avg_amount = mean(amount),
total_amount = sum(amount)) %>%
arrange(total_amount) %>%
mutate_if(is.numeric, funs(comma))
## `summarise()` has grouped output by 'cabang'. You can override using the
## `.groups` argument.
## `mutate_if()` ignored the following grouping variables:
## # A tibble: 3 × 6
## # Groups: cabang [1]
## cabang agen jumlah_hari total_loan_cair avg_amount total_amount
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 AE AE-3 4 16 310,625 4,970,000
## 2 AE AE-2 18 73 320,274 23,380,000
## 3 AE AE-1 21 86 300,581 25,850,000
library(dplyr)
library(scales)
df_loan %>%
filter(cabang == 'AH') %>%
filter(tanggal_cair >= '2020-05-01', tanggal_cair <= '2020-05-31') %>%
group_by(cabang, agen) %>%
summarise(jumlah_hari = n_distinct(tanggal_cair),
total_loan_cair = n_distinct(loan_id),
avg_amount = mean(amount),
total_amount = sum(amount)) %>%
arrange(total_amount) %>%
mutate_if(is.numeric, funs(comma))
## `summarise()` has grouped output by 'cabang'. You can override using the
## `.groups` argument.
## `mutate_if()` ignored the following grouping variables:
## # A tibble: 3 × 6
## # Groups: cabang [1]
## cabang agen jumlah_hari total_loan_cair avg_amount total_amount
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 AH AH-3 19 74 303,649 22,470,000
## 2 AH AH-1 21 81 301,358 24,410,000
## 3 AH AH-2 21 86 313,488 26,960,000
Based on the analysis, it can be concluded that the low performance of the AE branch is caused by one of the agents disbursing only 4 days in 1 month, even though other agents can be active for 21 days. This makes the total number of agents only 20% compared to other agents. While in the AH branch, the performance is very good because the three agents disburse almost / always every working day. 2 full people 21 days 1 person 19 days. So that the performance is well maintained. It is also worth noting that in order to compare the performance of branches, it must first be grouped according to the same characteristics. All indirect branches compared without knowing its characteristics.