KOMPUTASI

~ Ujian Tengah Semester ~


Kontak : \(\downarrow\)
Email
Instagram https://www.instagram.com/m_naufalardiansyah/
RPubs https://rpubs.com/muhammad_naufal/

Data Set

Kumpulan data akan anda gunakan dalam ujian tengah semester ini adalah data konsumen yang melakukan pinjaman di suatu Bank. Dataset ini memiliki 613 observasi, 13 atribut sebagai berikut:

Tugas 1

Lakukan proses persiapan data dengan R dan Python, dengan beberapa langkah berikut:

  • Import Data{.tabset .tabset-fade .tabset-pills}
dataloan <- read.csv("C:/logaritma/loan_train.csv", header=T, na.strings=c("","NA"))
dataloan
  • Penanganan Data Hilang
colSums(is.na(dataloan))
##           Loan_ID            Gender           Married        Dependents 
##                 0                13                 3                15 
##         Education     Self_Employed   ApplicantIncome CoapplicantIncome 
##                 0                32                 0                 0 
##        LoanAmount  Loan_Amount_Term    Credit_History     Property_Area 
##                22                14                50                 0 
##       Loan_Status 
##                 0
dataloan.clean <- na.omit(dataloan)
apply(is.na(dataloan),2, which) 
## $Loan_ID
## integer(0)
## 
## $Gender
##  [1]  24 127 172 189 315 335 461 468 478 508 577 589 593
## 
## $Married
## [1] 105 229 436
## 
## $Dependents
##  [1] 103 105 121 227 229 294 302 333 336 347 356 436 518 572 598
## 
## $Education
## integer(0)
## 
## $Self_Employed
##  [1]  12  20  25  30  31  96 108 112 115 159 171 219 232 237 269 296 334 337 345
## [20] 375 381 386 412 433 448 464 469 536 543 580 601 602
## 
## $ApplicantIncome
## integer(0)
## 
## $CoapplicantIncome
## integer(0)
## 
## $LoanAmount
##  [1]   1  36  64  82  96 103 104 114 128 203 285 306 323 339 388 436 438 480 525
## [20] 551 552 606
## 
## $Loan_Amount_Term
##  [1]  20  37  45  46  74 113 166 198 224 233 336 368 422 424
## 
## $Credit_History
##  [1]  17  25  31  43  80  84  87  96 118 126 130 131 157 182 188 199 220 237 238
## [20] 260 261 280 310 314 318 319 324 349 364 378 393 396 412 445 450 452 461 474
## [39] 491 492 498 504 507 531 534 545 557 566 584 601
## 
## $Property_Area
## integer(0)
## 
## $Loan_Status
## integer(0)
dataloan[is.na(dataloan)] = 0
dataloan
  • Periksa Data Duplikat
length(unique(dataloan.clean)) == nrow(dataloan.clean)
## [1] FALSE
  • Pemisahan Data Kategori dan Numerik
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2
## Warning: package 'tibble' was built under R version 4.2.1
## Warning: package 'dplyr' was built under R version 4.2.1
## Warning: package 'stringr' was built under R version 4.2.1
## Warning: package 'forcats' was built under R version 4.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
dataloan_kat <- select_if(dataloan.clean, is.character)
dataloan_kat
library(tidyverse)
dataloan_num <- select_if(dataloan.clean, is.numeric)
dataloan_num
  • Penanganan Data Numerik
standarisasi <- as.data.frame(lapply(dataloan_num,scale))
standarisasi
normalisasi <- function(x){(x- min(x))/(max(x)-min(x))}

normalisasi1 <- as.data.frame(lapply(dataloan_num,normalisasi))
normalisasi1
robust <- function(x){(x-median(x))/quantile(x,probs = .75)-quantile(x,probs = .25)}

robust_skala <- as.data.frame(lapply(dataloan_num,robust))
robust_skala
  • Penganann Data Pencilan
library(leaps)
## Warning: package 'leaps' was built under R version 4.2.1
outliers <- function(x) {
  Q1 <- quantile(x, probs = .25)
  Q3 <- quantile(x, probs = .75)
  iqr <- IQR(x)
  
  upper_limit <- Q3 + (1.5*iqr)
  lower_limit <- Q1 - (1.5*iqr)
}
dpen <- dataloan
outliers<-function(x){
  Q1 <- quantile (x, probs=.25) 
  Q3 <- quantile(x, probs=.75)
  iqr = Q3-Q1

upper_limit =Q3 + (iqr*1.5) 
lower_limit =Q1 -(iqr*1.5)

x > upper_limit | x < lower_limit}


outlier1 <- subset(dpen, outliers (dpen$LoanAmount))
outlier2 <- subset(dpen, outliers (dpen$ApplicantIncome)) 
outlier3 <- subset(dpen, outliers (dpen$CoapplicantIncome))

dpenlier <- rbind(outlier1, outlier2, outlier3) %>% distinct()

dpenlier
  • Penanganan Data Kategorikal
dataloan_kat %>% summarise_all(n_distinct)
GenderLabel <-factor(dataloan_kat$Gender, labels=c(0, 1))
MarriedLabel <-factor(dataloan_kat$Married, labels=c(0, 1))
DependentsLabel <-factor (dataloan_kat$Dependents, labels=c(0, 1, 2, 3))
EducationLabel <-factor(dataloan_kat$Education, labels=c(0, 1))
Self_EmployedLabel <-factor(dataloan_kat$Self_Employed, labels=c(0, 1))
Property_AreaLabel <- factor (dataloan_kat$Property_Area, labels=c(0, 1, 2))
Loan_StatusLabel <-factor (dataloan_kat$Loan_Status, labels=c(0, 1))

dataloan_kat_labeled <- data.frame("ID" = dataloan_kat$Loan_ID, GenderLabel, MarriedLabel, DependentsLabel, EducationLabel, Self_EmployedLabel, Property_AreaLabel, Loan_StatusLabel)

dataloan_kat_labeled

Tugas 2

Lakukan Proses Visualisasi Data dengan menggunakan R dan Python dengan beberapa langkah berikut:

  • Visualisasi Univariabel

Kategorik

library(ggplot2)                                    
                    
Loan_Train<- read.csv("C:/logaritma/loan_train.csv")           
ggplot(Loan_Train, aes(x = Gender)) +                
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                         
  theme_minimal() +                                 
  labs(x = "gender",                                
       y = "Frequency", 
       title = "Loan by Gender")     

library(ggplot2)                                     
                    
Loan_Train<- read.csv("C:/logaritma/loan_train.csv")             
ggplot(Loan_Train, aes(x = Married)) +                 
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                         
  theme_minimal() +                                  
  labs(x = "Married",                                
       y = "Frequency", 
       title = "Loan by Married")     

library(ggplot2)                                     
                    
Loan_Train<- read.csv("C:/logaritma/loan_train.csv")             
ggplot(Loan_Train, aes(x = Dependents)) +                 
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                         
  theme_minimal() +                                  
  labs(x = "Dependents",                              
       y = "Frequency", 
       title = "Loan by Dependents") 

library(ggplot2)                                     
                    
Loan_Train<- read.csv("C:/logaritma/loan_train.csv")    
ggplot(Loan_Train, aes(x = Education)) +              
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                      
  theme_minimal() +                               
  labs(x = "Education",                              
       y = "Frequency", 
       title = "Loan by Education")

library(ggplot2)                                     
                    
Loan_Train<- read.csv("C:/logaritma/loan_train.csv")            
ggplot(Loan_Train, aes(x = Property_Area)) +                  
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                       
  theme_minimal() +                                  
  labs(x = "Property_Area",                             
       y = "Frequency", 
       title = "Loan by Property_Area")    

library(ggplot2)                                     
                    
Loan_Train<- read.csv("C:/logaritma/loan_train.csv")          
ggplot(Loan_Train, aes(x = Loan_Status)) +                
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                         
  theme_minimal() +                                  
  labs(x = "Loan_Status",                                
       y = "Frequency", 
       title = "Loan by Loan Status")    

library(ggplot2)

Loan_Train<- read.csv("C:/logaritma/loan_train.csv")        
ggplot(Loan_Train, aes(x = Credit_History)) +                
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                         
  theme_minimal() +                                  
  labs(x = "Credit_History",                               
       y = "Frequency", 
       title = "Credit History") 
## Warning: Removed 50 rows containing non-finite values (stat_count).

Numerik

library(ggplot2)                                     
library(scales)                                     
## Warning: package 'scales' was built under R version 4.2.1
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
ggplot(Loan_Train, 
       aes(x = LoanAmount, 
           y= ..count.. / sum(..count..))) +
  geom_histogram(fill = "cornflowerblue", 
                 color = "white", 
                 binwidth = 5) +
  theme_minimal() +                                  
  labs(title="LoanAmount", 
       y = "Percent",
       x = "LoanAmount") +
  scale_y_continuous(labels = percent)
## Warning: Removed 22 rows containing non-finite values (stat_bin).

library(ggplot2)                                     
library(scales)                                      

ggplot(Loan_Train, 
       aes(x = Loan_Amount_Term, 
           y= ..count.. / sum(..count..))) +
  geom_histogram(fill = "cornflowerblue", 
                 color = "white", 
                 binwidth = 5) +
  theme_minimal() +                                  
  labs(title="Loan Amount Term", 
       y = "Percent",
       x = "Loan_Amount_Term") +
  scale_y_continuous(labels = percent)
## Warning: Removed 14 rows containing non-finite values (stat_bin).

library(ggplot2)                                     
library(scales)                                      

ggplot(Loan_Train, 
       aes(x = ApplicantIncome, 
           y= ..count.. / sum(..count..))) +
  geom_histogram(fill = "cornflowerblue", 
                 color = "white", 
                 binwidth = 5) +
  theme_minimal() +                                  
  labs(title="ApplicantIncom", 
       y = "Percent",
       x = "ApplicantIncome") +
  scale_y_continuous(labels = percent)

library(ggplot2)                                     
library(scales)                                       

ggplot(Loan_Train, 
       aes(x = CoapplicantIncome, 
           y= ..count.. / sum(..count..))) +
  geom_histogram(fill = "cornflowerblue", 
                 color = "white", 
                 binwidth = 5) +
  theme_minimal() +                                  
  labs(title="CoapplicantIncom", 
       y = "Percent",
       x = "CoapplicantIncome") +
  scale_y_continuous(labels = percent)

library(ggplot2)                                     
library(scales)                                      

ggplot(Loan_Train, 
       aes(x = Credit_History, 
           y= ..count.. / sum(..count..))) +
  geom_histogram(fill = "cornflowerblue", 
                 color = "white", 
                 binwidth = 5) +
  theme_minimal() +                                  
  labs(title="Credit_History", 
       y = "Percent",
       x = "CoapplicantIncome") +
  scale_y_continuous(labels = percent)
## Warning: Removed 50 rows containing non-finite values (stat_bin).

  • Visualisasi Bivariabel

Categorical VS Categorical

library(ggplot2)                                     
ggplot(Loan_Train, aes(x = Gender, fill = Education)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                     
ggplot(Loan_Train, aes(x = Gender, fill = Self_Employed)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                     
ggplot(Loan_Train, aes(x = Gender, fill = Property_Area)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                    
ggplot(Loan_Train, aes(x = Gender, fill = Loan_Status)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                     
ggplot(Loan_Train, aes(x = Married, fill = Education)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                     
ggplot(Loan_Train, aes(x = Married, fill = Self_Employed)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                     
ggplot(Loan_Train, aes(x = Married, fill = Loan_Status)) +
  theme_minimal() +                                 
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                     
ggplot(Loan_Train, aes(x = Education, fill = Property_Area)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                     
ggplot(Loan_Train, aes(x = Education, fill = Gender)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                     
ggplot(Loan_Train, aes(x = Gender, fill = Education)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

library(ggplot2)                                    
ggplot(Loan_Train, aes(x = Gender, fill = Married)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

Continous VS Continous

library(ggplot2)                                   
library(scales)                                     
data(Loan_Train, package="carData")
## Warning in data(Loan_Train, package = "carData"): data set 'Loan_Train' not
## found
# enhanced scatter plot
ggplot(Loan_Train, 
       aes(x = LoanAmount, 
           y = ApplicantIncome)) +
  geom_point(color="cornflowerblue", 
             size = 2, 
             alpha=.8) +
  scale_y_continuous(label = scales::dollar, 
                     limits = c(0, 10000)) +
  scale_x_continuous(breaks = seq(0, 60, 10), 
                     limits=c(0, 60)) +
  theme_minimal() +                                 
  labs(x = "LoanAmount",
       y = "ApplicantIncome",
       title = "",
       subtitle = "")
## Warning: Removed 579 rows containing missing values (geom_point).

library(ggplot2)                                    
library(scales)                                     
data(Loan_Train, package="carData")
## Warning in data(Loan_Train, package = "carData"): data set 'Loan_Train' not
## found
# enhanced scatter plot
ggplot(Loan_Train, 
       aes(x = LoanAmount, 
           y = CoapplicantIncome)) +
  geom_point(color="cornflowerblue", 
             size = 2, 
             alpha=.8) +
  scale_y_continuous(label = scales::dollar, 
                     limits = c(0, 10000)) +
  scale_x_continuous(breaks = seq(0, 60, 10), 
                     limits=c(0, 60)) +
  theme_minimal() +                                
  labs(x = " LoanAmount",
       y = "CoapplicantIncome",
       title = "",
       subtitle = "")
## Warning: Removed 577 rows containing missing values (geom_point).

library(ggplot2)                                     
library(scales)                                      
data(Loan_Train, package="carData")
## Warning in data(Loan_Train, package = "carData"): data set 'Loan_Train' not
## found
# enhanced scatter plot
ggplot(Loan_Train, 
       aes(x = ApplicantIncome, 
           y = CoapplicantIncome)) +
  geom_point(color="cornflowerblue", 
             size = 2, 
             alpha=.8) +
  scale_y_continuous(label = scales::dollar, 
                     limits = c(0, 10000)) +
  scale_x_continuous(breaks = seq(0, 60, 10), 
                     limits=c(0, 60)) +
  theme_minimal() +                                 
  labs(x = "ApplicantIncome",
       y = "CoapplicantIncome",
       title = "",
       subtitle = "")
## Warning: Removed 614 rows containing missing values (geom_point).

library(ggplot2)                                     
library(scales)                                       
data(Loan_Train, package="carData")
## Warning in data(Loan_Train, package = "carData"): data set 'Loan_Train' not
## found
# enhanced scatter plot
ggplot(Loan_Train, 
       aes(x = LoanAmount, 
           y = Loan_Amount_Term)) +
  geom_point(color="cornflowerblue", 
             size = 2, 
             alpha=.8) +
  scale_y_continuous(label = scales::dollar, 
                     limits = c(0, 10000)) +
  scale_x_continuous(breaks = seq(0, 60, 10), 
                     limits=c(0, 60)) +
  theme_minimal() +                                  
  labs(x = "LoanAmount",
       y = "Loan_Amount_Term",
       title = "",
       subtitle = "")
## Warning: Removed 577 rows containing missing values (geom_point).

  • Visualisasi Multivariabel
library(ggplot2)                                    
ggplot(Loan_Train, 
       aes(x = LoanAmount, 
           y = ApplicantIncome, 
           color = Education, 
           shape = Gender)) +
  geom_point(size = 3, alpha = .6) +
  theme_minimal() +
  labs(title = "")
## Warning: Removed 22 rows containing missing values (geom_point).

Tugas 3

Lakukan proses analisa data secara deskriptif menggunakan R dan Python dengan beberapa langkah berikut:

  • Kualitatif
    • Kategori Univariat
library(readr)                                     
df.loan= read_csv("C:/logaritma/loan_train.csv")  
## Rows: 614 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Loan_ID, Gender, Married, Dependents, Education, Self_Employed, Pro...
## dbl (5): ApplicantIncome, CoapplicantIncome, LoanAmount, Loan_Amount_Term, C...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#str(df)                                          
apply(is.na(df.loan),2, which) 
## $Loan_ID
## integer(0)
## 
## $Gender
##  [1]  24 127 172 189 315 335 461 468 478 508 577 589 593
## 
## $Married
## [1] 105 229 436
## 
## $Dependents
##  [1] 103 105 121 227 229 294 302 333 336 347 356 436 518 572 598
## 
## $Education
## integer(0)
## 
## $Self_Employed
##  [1]  12  20  25  30  31  96 108 112 115 159 171 219 232 237 269 296 334 337 345
## [20] 375 381 386 412 433 448 464 469 536 543 580 601 602
## 
## $ApplicantIncome
## integer(0)
## 
## $CoapplicantIncome
## integer(0)
## 
## $LoanAmount
##  [1]   1  36  64  82  96 103 104 114 128 203 285 306 323 339 388 436 438 480 525
## [20] 551 552 606
## 
## $Loan_Amount_Term
##  [1]  20  37  45  46  74 113 166 198 224 233 336 368 422 424
## 
## $Credit_History
##  [1]  17  25  31  43  80  84  87  96 118 126 130 131 157 182 188 199 220 237 238
## [20] 260 261 280 310 314 318 319 324 349 364 378 393 396 412 445 450 452 461 474
## [39] 491 492 498 504 507 531 534 545 557 566 584 601
## 
## $Property_Area
## integer(0)
## 
## $Loan_Status
## integer(0)
df.loan<-na.omit(df.loan)                              
head(df.loan,3)  
Cat1 <- table(df.loan$Gender)                    
Cat1  
## 
## Female   Male 
##     86    394
prop.table(table(df.loan$Gender)) 
## 
##    Female      Male 
## 0.1791667 0.8208333
  • Kategori Bivariat
library(readr)                                     
library(dplyr)                                    
library(magrittr)                                  
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
Cat2<- df.loan %>%                                 
select(Gender, Education) %>%                      
table()                                           
#prop.table()                                     
Cat2
##         Education
## Gender   Graduate Not Graduate
##   Female       73           13
##   Male        310           84
  • Kategori Multivariat
Cat3 <- df.loan %>%                                    
select(Gender, Education, Loan_Status) %>%             
#table()                                           
#prop.table()                                      
ftable()                                           
Cat3   
##                     Loan_Status   N   Y
## Gender Education                       
## Female Graduate                  28  45
##        Not Graduate               4   9
## Male   Graduate                  84 226
##        Not Graduate              32  52
  • Kuantitatif

    • Univariat numerik
Quan.loan <- df.loan %>% 
select_if(is.numeric)
names(Quan.loan)
## [1] "ApplicantIncome"   "CoapplicantIncome" "LoanAmount"       
## [4] "Loan_Amount_Term"  "Credit_History"
mean(Quan.loan$LoanAmount) 
## [1] 144.7354
quantile(Quan.loan$LoanAmount)
##   0%  25%  50%  75% 100% 
##    9  100  128  170  600
median(Quan.loan$LoanAmount)
## [1] 128
mode(Quan.loan$LoanAmount) 
## [1] "numeric"
summary(Quan.loan)
##  ApplicantIncome CoapplicantIncome   LoanAmount    Loan_Amount_Term
##  Min.   :  150   Min.   :    0     Min.   :  9.0   Min.   : 36.0   
##  1st Qu.: 2899   1st Qu.:    0     1st Qu.:100.0   1st Qu.:360.0   
##  Median : 3859   Median : 1084     Median :128.0   Median :360.0   
##  Mean   : 5364   Mean   : 1581     Mean   :144.7   Mean   :342.1   
##  3rd Qu.: 5852   3rd Qu.: 2253     3rd Qu.:170.0   3rd Qu.:360.0   
##  Max.   :81000   Max.   :33837     Max.   :600.0   Max.   :480.0   
##  Credit_History  
##  Min.   :0.0000  
##  1st Qu.:1.0000  
##  Median :1.0000  
##  Mean   :0.8542  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
var(Quan.loan$LoanAmount)
## [1] 6481.565
IQR(Quan.loan$LoanAmount) 
## [1] 70
mad(Quan.loan$Credit_History)
## [1] 0
sd(Quan.loan$Credit_History)
## [1] 0.3533073
library(e1071)                                    
## Warning: package 'e1071' was built under R version 4.2.1
skewness(Quan.loan$LoanAmount)
## [1] 2.346698
kurtosis(Quan.loan$LoanAmount)
## [1] 8.354478
  • Bivariat numerik
cov(Quan.loan$LoanAmount,Quan.loan$Loan_Amount_Term)
## [1] 267.0571
cor(Quan.loan$LoanAmount,Quan.loan$Loan_Amount_Term)
## [1] 0.05086675
zscore=(Quan.loan$LoanAmount-mean(Quan.loan$LoanAmount))/sd(Quan.loan$LoanAmount)
  • Multivariat numerik
cov(Quan.loan)
##                   ApplicantIncome CoapplicantIncome    LoanAmount
## ApplicantIncome     32129072.2408     -1.670551e+06 226029.825404
## CoapplicantIncome   -1670550.7308      6.852313e+06  40197.560179
## LoanAmount            226029.8254      4.019756e+04   6481.564505
## Loan_Amount_Term       -4006.1953     -9.857739e+02    267.057098
## Credit_History          -112.4526     -8.038516e+00     -1.159751
##                   Loan_Amount_Term Credit_History
## ApplicantIncome      -4006.1953027   -112.4526357
## CoapplicantIncome     -985.7738706     -8.0385160
## LoanAmount             267.0570981     -1.1597512
## Loan_Amount_Term      4252.6572025      0.7588727
## Credit_History           0.7588727      0.1248260
cor(Quan.loan)
##                   ApplicantIncome CoapplicantIncome  LoanAmount
## ApplicantIncome        1.00000000      -0.112587969  0.49530959
## CoapplicantIncome     -0.11258797       1.000000000  0.19073974
## LoanAmount             0.49530959       0.190739737  1.00000000
## Loan_Amount_Term      -0.01083809      -0.005774688  0.05086675
## Credit_History        -0.05615235      -0.008691700 -0.04077297
##                   Loan_Amount_Term Credit_History
## ApplicantIncome       -0.010838092    -0.05615235
## CoapplicantIncome     -0.005774688    -0.00869170
## LoanAmount             0.050866753    -0.04077297
## Loan_Amount_Term       1.000000000     0.03293716
## Credit_History         0.032937159     1.00000000
  • EDA dengan cara Malas
library(funModeling) 
## Warning: package 'funModeling' was built under R version 4.2.1
## Loading required package: Hmisc
## Warning: package 'Hmisc' was built under R version 4.2.1
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:e1071':
## 
##     impute
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## funModeling v.1.9.4 :)
## Examples and tutorials at livebook.datascienceheroes.com
##  / Now in Spanish: librovivodecienciadedatos.ai
library(tidyverse) 
library(Hmisc)
library(skimr)
## Warning: package 'skimr' was built under R version 4.2.1
basic_eda <- function(dataloan)
{
  glimpse(dataloan)
  skim(dataloan)
  df_status(dataloan)
  freq(dataloan) 
  profiling_num(dataloan)
  plot_num(dataloan)
  describe(dataloan)
}
basic_eda(dataloan)
## Rows: 614
## Columns: 13
## $ Loan_ID           <chr> "LP001002", "LP001003", "LP001005", "LP001006", "LP0…
## $ Gender            <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Mal…
## $ Married           <chr> "No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes"…
## $ Dependents        <chr> "0", "1", "0", "0", "0", "2", "0", "3+", "2", "1", "…
## $ Education         <chr> "Graduate", "Graduate", "Graduate", "Not Graduate", …
## $ Self_Employed     <chr> "No", "No", "Yes", "No", "No", "Yes", "No", "No", "N…
## $ ApplicantIncome   <int> 5849, 4583, 3000, 2583, 6000, 5417, 2333, 3036, 4006…
## $ CoapplicantIncome <dbl> 0, 1508, 0, 2358, 0, 4196, 1516, 2504, 1526, 10968, …
## $ LoanAmount        <dbl> 0, 128, 66, 120, 141, 267, 95, 158, 168, 349, 70, 10…
## $ Loan_Amount_Term  <dbl> 360, 360, 360, 360, 360, 360, 360, 360, 360, 360, 36…
## $ Credit_History    <dbl> 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0…
## $ Property_Area     <chr> "Urban", "Rural", "Urban", "Urban", "Urban", "Urban"…
## $ Loan_Status       <chr> "Y", "N", "Y", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y…
##             variable q_zeros p_zeros q_na p_na q_inf p_inf      type unique
## 1            Loan_ID       0    0.00    0    0     0     0 character    614
## 2             Gender      13    2.12    0    0     0     0 character      3
## 3            Married       3    0.49    0    0     0     0 character      3
## 4         Dependents     360   58.63    0    0     0     0 character      4
## 5          Education       0    0.00    0    0     0     0 character      2
## 6      Self_Employed      32    5.21    0    0     0     0 character      3
## 7    ApplicantIncome       0    0.00    0    0     0     0   integer    505
## 8  CoapplicantIncome     273   44.46    0    0     0     0   numeric    287
## 9         LoanAmount      22    3.58    0    0     0     0   numeric    204
## 10  Loan_Amount_Term      14    2.28    0    0     0     0   numeric     11
## 11    Credit_History     139   22.64    0    0     0     0   numeric      2
## 12     Property_Area       0    0.00    0    0     0     0 character      3
## 13       Loan_Status       0    0.00    0    0     0     0 character      2
## Warning in freq_logic(data = data, input = input[i], plot, na.rm, path_out =
## path_out): Skipping plot for variable 'Loan_ID' (more than 100 categories)
##      Loan_ID frequency percentage cumulative_perc
## 1   LP001002         1       0.16            0.16
## 2   LP001003         1       0.16            0.32
## 3   LP001005         1       0.16            0.48
## 4   LP001006         1       0.16            0.64
## 5   LP001008         1       0.16            0.80
## 6   LP001011         1       0.16            0.96
## 7   LP001013         1       0.16            1.12
## 8   LP001014         1       0.16            1.28
## 9   LP001018         1       0.16            1.44
## 10  LP001020         1       0.16            1.60
## 11  LP001024         1       0.16            1.76
## 12  LP001027         1       0.16            1.92
## 13  LP001028         1       0.16            2.08
## 14  LP001029         1       0.16            2.24
## 15  LP001030         1       0.16            2.40
## 16  LP001032         1       0.16            2.56
## 17  LP001034         1       0.16            2.72
## 18  LP001036         1       0.16            2.88
## 19  LP001038         1       0.16            3.04
## 20  LP001041         1       0.16            3.20
## 21  LP001043         1       0.16            3.36
## 22  LP001046         1       0.16            3.52
## 23  LP001047         1       0.16            3.68
## 24  LP001050         1       0.16            3.84
## 25  LP001052         1       0.16            4.00
## 26  LP001066         1       0.16            4.16
## 27  LP001068         1       0.16            4.32
## 28  LP001073         1       0.16            4.48
## 29  LP001086         1       0.16            4.64
## 30  LP001087         1       0.16            4.80
## 31  LP001091         1       0.16            4.96
## 32  LP001095         1       0.16            5.12
## 33  LP001097         1       0.16            5.28
## 34  LP001098         1       0.16            5.44
## 35  LP001100         1       0.16            5.60
## 36  LP001106         1       0.16            5.76
## 37  LP001109         1       0.16            5.92
## 38  LP001112         1       0.16            6.08
## 39  LP001114         1       0.16            6.24
## 40  LP001116         1       0.16            6.40
## 41  LP001119         1       0.16            6.56
## 42  LP001120         1       0.16            6.72
## 43  LP001123         1       0.16            6.88
## 44  LP001131         1       0.16            7.04
## 45  LP001136         1       0.16            7.20
## 46  LP001137         1       0.16            7.36
## 47  LP001138         1       0.16            7.52
## 48  LP001144         1       0.16            7.68
## 49  LP001146         1       0.16            7.84
## 50  LP001151         1       0.16            8.00
## 51  LP001155         1       0.16            8.16
## 52  LP001157         1       0.16            8.32
## 53  LP001164         1       0.16            8.48
## 54  LP001179         1       0.16            8.64
## 55  LP001186         1       0.16            8.80
## 56  LP001194         1       0.16            8.96
## 57  LP001195         1       0.16            9.12
## 58  LP001197         1       0.16            9.28
## 59  LP001198         1       0.16            9.44
## 60  LP001199         1       0.16            9.60
## 61  LP001205         1       0.16            9.76
## 62  LP001206         1       0.16            9.92
## 63  LP001207         1       0.16           10.08
## 64  LP001213         1       0.16           10.24
## 65  LP001222         1       0.16           10.40
## 66  LP001225         1       0.16           10.56
## 67  LP001228         1       0.16           10.72
## 68  LP001233         1       0.16           10.88
## 69  LP001238         1       0.16           11.04
## 70  LP001241         1       0.16           11.20
## 71  LP001243         1       0.16           11.36
## 72  LP001245         1       0.16           11.52
## 73  LP001248         1       0.16           11.68
## 74  LP001250         1       0.16           11.84
## 75  LP001253         1       0.16           12.00
## 76  LP001255         1       0.16           12.16
## 77  LP001256         1       0.16           12.32
## 78  LP001259         1       0.16           12.48
## 79  LP001263         1       0.16           12.64
## 80  LP001264         1       0.16           12.80
## 81  LP001265         1       0.16           12.96
## 82  LP001266         1       0.16           13.12
## 83  LP001267         1       0.16           13.28
## 84  LP001273         1       0.16           13.44
## 85  LP001275         1       0.16           13.60
## 86  LP001279         1       0.16           13.76
## 87  LP001280         1       0.16           13.92
## 88  LP001282         1       0.16           14.08
## 89  LP001289         1       0.16           14.24
## 90  LP001310         1       0.16           14.40
## 91  LP001316         1       0.16           14.56
## 92  LP001318         1       0.16           14.72
## 93  LP001319         1       0.16           14.88
## 94  LP001322         1       0.16           15.04
## 95  LP001325         1       0.16           15.20
## 96  LP001326         1       0.16           15.36
## 97  LP001327         1       0.16           15.52
## 98  LP001333         1       0.16           15.68
## 99  LP001334         1       0.16           15.84
## 100 LP001343         1       0.16           16.00
## 101 LP001345         1       0.16           16.16
## 102 LP001349         1       0.16           16.32
## 103 LP001350         1       0.16           16.48
## 104 LP001356         1       0.16           16.64
## 105 LP001357         1       0.16           16.80
## 106 LP001367         1       0.16           16.96
## 107 LP001369         1       0.16           17.12
## 108 LP001370         1       0.16           17.28
## 109 LP001379         1       0.16           17.44
## 110 LP001384         1       0.16           17.60
## 111 LP001385         1       0.16           17.76
## 112 LP001387         1       0.16           17.92
## 113 LP001391         1       0.16           18.08
## 114 LP001392         1       0.16           18.24
## 115 LP001398         1       0.16           18.40
## 116 LP001401         1       0.16           18.56
## 117 LP001404         1       0.16           18.72
## 118 LP001405         1       0.16           18.88
## 119 LP001421         1       0.16           19.04
## 120 LP001422         1       0.16           19.20
## 121 LP001426         1       0.16           19.36
## 122 LP001430         1       0.16           19.52
## 123 LP001431         1       0.16           19.68
## 124 LP001432         1       0.16           19.84
## 125 LP001439         1       0.16           20.00
## 126 LP001443         1       0.16           20.16
## 127 LP001448         1       0.16           20.32
## 128 LP001449         1       0.16           20.48
## 129 LP001451         1       0.16           20.64
## 130 LP001465         1       0.16           20.80
## 131 LP001469         1       0.16           20.96
## 132 LP001473         1       0.16           21.12
## 133 LP001478         1       0.16           21.28
## 134 LP001482         1       0.16           21.44
## 135 LP001487         1       0.16           21.60
## 136 LP001488         1       0.16           21.76
## 137 LP001489         1       0.16           21.92
## 138 LP001491         1       0.16           22.08
## 139 LP001492         1       0.16           22.24
## 140 LP001493         1       0.16           22.40
## 141 LP001497         1       0.16           22.56
## 142 LP001498         1       0.16           22.72
## 143 LP001504         1       0.16           22.88
## 144 LP001507         1       0.16           23.04
## 145 LP001508         1       0.16           23.20
## 146 LP001514         1       0.16           23.36
## 147 LP001516         1       0.16           23.52
## 148 LP001518         1       0.16           23.68
## 149 LP001519         1       0.16           23.84
## 150 LP001520         1       0.16           24.00
## 151 LP001528         1       0.16           24.16
## 152 LP001529         1       0.16           24.32
## 153 LP001531         1       0.16           24.48
## 154 LP001532         1       0.16           24.64
## 155 LP001535         1       0.16           24.80
## 156 LP001536         1       0.16           24.96
## 157 LP001541         1       0.16           25.12
## 158 LP001543         1       0.16           25.28
## 159 LP001546         1       0.16           25.44
## 160 LP001552         1       0.16           25.60
## 161 LP001560         1       0.16           25.76
## 162 LP001562         1       0.16           25.92
## 163 LP001565         1       0.16           26.08
## 164 LP001570         1       0.16           26.24
## 165 LP001572         1       0.16           26.40
## 166 LP001574         1       0.16           26.56
## 167 LP001577         1       0.16           26.72
## 168 LP001578         1       0.16           26.88
## 169 LP001579         1       0.16           27.04
## 170 LP001580         1       0.16           27.20
## 171 LP001581         1       0.16           27.36
## 172 LP001585         1       0.16           27.52
## 173 LP001586         1       0.16           27.68
## 174 LP001594         1       0.16           27.84
## 175 LP001603         1       0.16           28.00
## 176 LP001606         1       0.16           28.16
## 177 LP001608         1       0.16           28.32
## 178 LP001610         1       0.16           28.48
## 179 LP001616         1       0.16           28.64
## 180 LP001630         1       0.16           28.80
## 181 LP001633         1       0.16           28.96
## 182 LP001634         1       0.16           29.12
## 183 LP001636         1       0.16           29.28
## 184 LP001637         1       0.16           29.44
## 185 LP001639         1       0.16           29.60
## 186 LP001640         1       0.16           29.76
## 187 LP001641         1       0.16           29.92
## 188 LP001643         1       0.16           30.08
## 189 LP001644         1       0.16           30.24
## 190 LP001647         1       0.16           30.40
## 191 LP001653         1       0.16           30.56
## 192 LP001656         1       0.16           30.72
## 193 LP001657         1       0.16           30.88
## 194 LP001658         1       0.16           31.04
## 195 LP001664         1       0.16           31.20
## 196 LP001665         1       0.16           31.36
## 197 LP001666         1       0.16           31.52
## 198 LP001669         1       0.16           31.68
## 199 LP001671         1       0.16           31.84
## 200 LP001673         1       0.16           32.00
## 201 LP001674         1       0.16           32.16
## 202 LP001677         1       0.16           32.32
## 203 LP001682         1       0.16           32.48
## 204 LP001688         1       0.16           32.64
## 205 LP001691         1       0.16           32.80
## 206 LP001692         1       0.16           32.96
## 207 LP001693         1       0.16           33.12
## 208 LP001698         1       0.16           33.28
## 209 LP001699         1       0.16           33.44
## 210 LP001702         1       0.16           33.60
## 211 LP001708         1       0.16           33.76
## 212 LP001711         1       0.16           33.92
## 213 LP001713         1       0.16           34.08
## 214 LP001715         1       0.16           34.24
## 215 LP001716         1       0.16           34.40
## 216 LP001720         1       0.16           34.56
## 217 LP001722         1       0.16           34.72
## 218 LP001726         1       0.16           34.88
## 219 LP001732         1       0.16           35.04
## 220 LP001734         1       0.16           35.20
## 221 LP001736         1       0.16           35.36
## 222 LP001743         1       0.16           35.52
## 223 LP001744         1       0.16           35.68
## 224 LP001749         1       0.16           35.84
## 225 LP001750         1       0.16           36.00
## 226 LP001751         1       0.16           36.16
## 227 LP001754         1       0.16           36.32
## 228 LP001758         1       0.16           36.48
## 229 LP001760         1       0.16           36.64
## 230 LP001761         1       0.16           36.80
## 231 LP001765         1       0.16           36.96
## 232 LP001768         1       0.16           37.12
## 233 LP001770         1       0.16           37.28
## 234 LP001776         1       0.16           37.44
## 235 LP001778         1       0.16           37.60
## 236 LP001784         1       0.16           37.76
## 237 LP001786         1       0.16           37.92
## 238 LP001788         1       0.16           38.08
## 239 LP001790         1       0.16           38.24
## 240 LP001792         1       0.16           38.40
## 241 LP001798         1       0.16           38.56
## 242 LP001800         1       0.16           38.72
## 243 LP001806         1       0.16           38.88
## 244 LP001807         1       0.16           39.04
## 245 LP001811         1       0.16           39.20
## 246 LP001813         1       0.16           39.36
## 247 LP001814         1       0.16           39.52
## 248 LP001819         1       0.16           39.68
## 249 LP001824         1       0.16           39.84
## 250 LP001825         1       0.16           40.00
## 251 LP001835         1       0.16           40.16
## 252 LP001836         1       0.16           40.32
## 253 LP001841         1       0.16           40.48
## 254 LP001843         1       0.16           40.64
## 255 LP001844         1       0.16           40.80
## 256 LP001846         1       0.16           40.96
## 257 LP001849         1       0.16           41.12
## 258 LP001854         1       0.16           41.28
## 259 LP001859         1       0.16           41.44
## 260 LP001864         1       0.16           41.60
## 261 LP001865         1       0.16           41.76
## 262 LP001868         1       0.16           41.92
## 263 LP001870         1       0.16           42.08
## 264 LP001871         1       0.16           42.24
## 265 LP001872         1       0.16           42.40
## 266 LP001875         1       0.16           42.56
## 267 LP001877         1       0.16           42.72
## 268 LP001882         1       0.16           42.88
## 269 LP001883         1       0.16           43.04
## 270 LP001884         1       0.16           43.20
## 271 LP001888         1       0.16           43.36
## 272 LP001891         1       0.16           43.52
## 273 LP001892         1       0.16           43.68
## 274 LP001894         1       0.16           43.84
## 275 LP001896         1       0.16           44.00
## 276 LP001900         1       0.16           44.16
## 277 LP001903         1       0.16           44.32
## 278 LP001904         1       0.16           44.48
## 279 LP001907         1       0.16           44.64
## 280 LP001908         1       0.16           44.80
## 281 LP001910         1       0.16           44.96
## 282 LP001914         1       0.16           45.12
## 283 LP001915         1       0.16           45.28
## 284 LP001917         1       0.16           45.44
## 285 LP001922         1       0.16           45.60
## 286 LP001924         1       0.16           45.76
## 287 LP001925         1       0.16           45.92
## 288 LP001926         1       0.16           46.08
## 289 LP001931         1       0.16           46.24
## 290 LP001935         1       0.16           46.40
## 291 LP001936         1       0.16           46.56
## 292 LP001938         1       0.16           46.72
## 293 LP001940         1       0.16           46.88
## 294 LP001945         1       0.16           47.04
## 295 LP001947         1       0.16           47.20
## 296 LP001949         1       0.16           47.36
## 297 LP001953         1       0.16           47.52
## 298 LP001954         1       0.16           47.68
## 299 LP001955         1       0.16           47.84
## 300 LP001963         1       0.16           48.00
## 301 LP001964         1       0.16           48.16
## 302 LP001972         1       0.16           48.32
## 303 LP001974         1       0.16           48.48
## 304 LP001977         1       0.16           48.64
## 305 LP001978         1       0.16           48.80
## 306 LP001990         1       0.16           48.96
## 307 LP001993         1       0.16           49.12
## 308 LP001994         1       0.16           49.28
## 309 LP001996         1       0.16           49.44
## 310 LP001998         1       0.16           49.60
## 311 LP002002         1       0.16           49.76
## 312 LP002004         1       0.16           49.92
## 313 LP002006         1       0.16           50.08
## 314 LP002008         1       0.16           50.24
## 315 LP002024         1       0.16           50.40
## 316 LP002031         1       0.16           50.56
## 317 LP002035         1       0.16           50.72
## 318 LP002036         1       0.16           50.88
## 319 LP002043         1       0.16           51.04
## 320 LP002050         1       0.16           51.20
## 321 LP002051         1       0.16           51.36
## 322 LP002053         1       0.16           51.52
## 323 LP002054         1       0.16           51.68
## 324 LP002055         1       0.16           51.84
## 325 LP002065         1       0.16           52.00
## 326 LP002067         1       0.16           52.16
## 327 LP002068         1       0.16           52.32
## 328 LP002082         1       0.16           52.48
## 329 LP002086         1       0.16           52.64
## 330 LP002087         1       0.16           52.80
## 331 LP002097         1       0.16           52.96
## 332 LP002098         1       0.16           53.12
## 333 LP002100         1       0.16           53.28
## 334 LP002101         1       0.16           53.44
## 335 LP002103         1       0.16           53.60
## 336 LP002106         1       0.16           53.76
## 337 LP002110         1       0.16           53.92
## 338 LP002112         1       0.16           54.08
## 339 LP002113         1       0.16           54.24
## 340 LP002114         1       0.16           54.40
## 341 LP002115         1       0.16           54.56
## 342 LP002116         1       0.16           54.72
## 343 LP002119         1       0.16           54.88
## 344 LP002126         1       0.16           55.04
## 345 LP002128         1       0.16           55.20
## 346 LP002129         1       0.16           55.36
## 347 LP002130         1       0.16           55.52
## 348 LP002131         1       0.16           55.68
## 349 LP002137         1       0.16           55.84
## 350 LP002138         1       0.16           56.00
## 351 LP002139         1       0.16           56.16
## 352 LP002140         1       0.16           56.32
## 353 LP002141         1       0.16           56.48
## 354 LP002142         1       0.16           56.64
## 355 LP002143         1       0.16           56.80
## 356 LP002144         1       0.16           56.96
## 357 LP002149         1       0.16           57.12
## 358 LP002151         1       0.16           57.28
## 359 LP002158         1       0.16           57.44
## 360 LP002160         1       0.16           57.60
## 361 LP002161         1       0.16           57.76
## 362 LP002170         1       0.16           57.92
## 363 LP002175         1       0.16           58.08
## 364 LP002178         1       0.16           58.24
## 365 LP002180         1       0.16           58.40
## 366 LP002181         1       0.16           58.56
## 367 LP002187         1       0.16           58.72
## 368 LP002188         1       0.16           58.88
## 369 LP002190         1       0.16           59.04
## 370 LP002191         1       0.16           59.20
## 371 LP002194         1       0.16           59.36
## 372 LP002197         1       0.16           59.52
## 373 LP002201         1       0.16           59.68
## 374 LP002205         1       0.16           59.84
## 375 LP002209         1       0.16           60.00
## 376 LP002211         1       0.16           60.16
## 377 LP002219         1       0.16           60.32
## 378 LP002223         1       0.16           60.48
## 379 LP002224         1       0.16           60.64
## 380 LP002225         1       0.16           60.80
## 381 LP002226         1       0.16           60.96
## 382 LP002229         1       0.16           61.12
## 383 LP002231         1       0.16           61.28
## 384 LP002234         1       0.16           61.44
## 385 LP002236         1       0.16           61.60
## 386 LP002237         1       0.16           61.76
## 387 LP002239         1       0.16           61.92
## 388 LP002243         1       0.16           62.08
## 389 LP002244         1       0.16           62.24
## 390 LP002250         1       0.16           62.40
## 391 LP002255         1       0.16           62.56
## 392 LP002262         1       0.16           62.72
## 393 LP002263         1       0.16           62.88
## 394 LP002265         1       0.16           63.04
## 395 LP002266         1       0.16           63.20
## 396 LP002272         1       0.16           63.36
## 397 LP002277         1       0.16           63.52
## 398 LP002281         1       0.16           63.68
## 399 LP002284         1       0.16           63.84
## 400 LP002287         1       0.16           64.00
## 401 LP002288         1       0.16           64.16
## 402 LP002296         1       0.16           64.32
## 403 LP002297         1       0.16           64.48
## 404 LP002300         1       0.16           64.64
## 405 LP002301         1       0.16           64.80
## 406 LP002305         1       0.16           64.96
## 407 LP002308         1       0.16           65.12
## 408 LP002314         1       0.16           65.28
## 409 LP002315         1       0.16           65.44
## 410 LP002317         1       0.16           65.60
## 411 LP002318         1       0.16           65.76
## 412 LP002319         1       0.16           65.92
## 413 LP002328         1       0.16           66.08
## 414 LP002332         1       0.16           66.24
## 415 LP002335         1       0.16           66.40
## 416 LP002337         1       0.16           66.56
## 417 LP002341         1       0.16           66.72
## 418 LP002342         1       0.16           66.88
## 419 LP002345         1       0.16           67.04
## 420 LP002347         1       0.16           67.20
## 421 LP002348         1       0.16           67.36
## 422 LP002357         1       0.16           67.52
## 423 LP002361         1       0.16           67.68
## 424 LP002362         1       0.16           67.84
## 425 LP002364         1       0.16           68.00
## 426 LP002366         1       0.16           68.16
## 427 LP002367         1       0.16           68.32
## 428 LP002368         1       0.16           68.48
## 429 LP002369         1       0.16           68.64
## 430 LP002370         1       0.16           68.80
## 431 LP002377         1       0.16           68.96
## 432 LP002379         1       0.16           69.12
## 433 LP002386         1       0.16           69.28
## 434 LP002387         1       0.16           69.44
## 435 LP002390         1       0.16           69.60
## 436 LP002393         1       0.16           69.76
## 437 LP002398         1       0.16           69.92
## 438 LP002401         1       0.16           70.08
## 439 LP002403         1       0.16           70.24
## 440 LP002407         1       0.16           70.40
## 441 LP002408         1       0.16           70.56
## 442 LP002409         1       0.16           70.72
## 443 LP002418         1       0.16           70.88
## 444 LP002422         1       0.16           71.04
## 445 LP002424         1       0.16           71.20
## 446 LP002429         1       0.16           71.36
## 447 LP002434         1       0.16           71.52
## 448 LP002435         1       0.16           71.68
## 449 LP002443         1       0.16           71.84
## 450 LP002444         1       0.16           72.00
## 451 LP002446         1       0.16           72.16
## 452 LP002447         1       0.16           72.32
## 453 LP002448         1       0.16           72.48
## 454 LP002449         1       0.16           72.64
## 455 LP002453         1       0.16           72.80
## 456 LP002455         1       0.16           72.96
## 457 LP002459         1       0.16           73.12
## 458 LP002467         1       0.16           73.28
## 459 LP002472         1       0.16           73.44
## 460 LP002473         1       0.16           73.60
## 461 LP002478         1       0.16           73.76
## 462 LP002484         1       0.16           73.92
## 463 LP002487         1       0.16           74.08
## 464 LP002489         1       0.16           74.24
## 465 LP002493         1       0.16           74.40
## 466 LP002494         1       0.16           74.56
## 467 LP002500         1       0.16           74.72
## 468 LP002501         1       0.16           74.88
## 469 LP002502         1       0.16           75.04
## 470 LP002505         1       0.16           75.20
## 471 LP002515         1       0.16           75.36
## 472 LP002517         1       0.16           75.52
## 473 LP002519         1       0.16           75.68
## 474 LP002522         1       0.16           75.84
## 475 LP002524         1       0.16           76.00
## 476 LP002527         1       0.16           76.16
## 477 LP002529         1       0.16           76.32
## 478 LP002530         1       0.16           76.48
## 479 LP002531         1       0.16           76.64
## 480 LP002533         1       0.16           76.80
## 481 LP002534         1       0.16           76.96
## 482 LP002536         1       0.16           77.12
## 483 LP002537         1       0.16           77.28
## 484 LP002541         1       0.16           77.44
## 485 LP002543         1       0.16           77.60
## 486 LP002544         1       0.16           77.76
## 487 LP002545         1       0.16           77.92
## 488 LP002547         1       0.16           78.08
## 489 LP002555         1       0.16           78.24
## 490 LP002556         1       0.16           78.40
## 491 LP002560         1       0.16           78.56
## 492 LP002562         1       0.16           78.72
## 493 LP002571         1       0.16           78.88
## 494 LP002582         1       0.16           79.04
## 495 LP002585         1       0.16           79.20
## 496 LP002586         1       0.16           79.36
## 497 LP002587         1       0.16           79.52
## 498 LP002588         1       0.16           79.68
## 499 LP002600         1       0.16           79.84
## 500 LP002602         1       0.16           80.00
## 501 LP002603         1       0.16           80.16
## 502 LP002606         1       0.16           80.32
## 503 LP002615         1       0.16           80.48
## 504 LP002618         1       0.16           80.64
## 505 LP002619         1       0.16           80.80
## 506 LP002622         1       0.16           80.96
## 507 LP002624         1       0.16           81.12
## 508 LP002625         1       0.16           81.28
## 509 LP002626         1       0.16           81.44
## 510 LP002634         1       0.16           81.60
## 511 LP002637         1       0.16           81.76
## 512 LP002640         1       0.16           81.92
## 513 LP002643         1       0.16           82.08
## 514 LP002648         1       0.16           82.24
## 515 LP002652         1       0.16           82.40
## 516 LP002659         1       0.16           82.56
## 517 LP002670         1       0.16           82.72
## 518 LP002682         1       0.16           82.88
## 519 LP002683         1       0.16           83.04
## 520 LP002684         1       0.16           83.20
## 521 LP002689         1       0.16           83.36
## 522 LP002690         1       0.16           83.52
## 523 LP002692         1       0.16           83.68
## 524 LP002693         1       0.16           83.84
## 525 LP002697         1       0.16           84.00
## 526 LP002699         1       0.16           84.16
## 527 LP002705         1       0.16           84.32
## 528 LP002706         1       0.16           84.48
## 529 LP002714         1       0.16           84.64
## 530 LP002716         1       0.16           84.80
## 531 LP002717         1       0.16           84.96
## 532 LP002720         1       0.16           85.12
## 533 LP002723         1       0.16           85.28
## 534 LP002729         1       0.16           85.44
## 535 LP002731         1       0.16           85.60
## 536 LP002732         1       0.16           85.76
## 537 LP002734         1       0.16           85.92
## 538 LP002738         1       0.16           86.08
## 539 LP002739         1       0.16           86.24
## 540 LP002740         1       0.16           86.40
## 541 LP002741         1       0.16           86.56
## 542 LP002743         1       0.16           86.72
## 543 LP002753         1       0.16           86.88
## 544 LP002755         1       0.16           87.04
## 545 LP002757         1       0.16           87.20
## 546 LP002767         1       0.16           87.36
## 547 LP002768         1       0.16           87.52
## 548 LP002772         1       0.16           87.68
## 549 LP002776         1       0.16           87.84
## 550 LP002777         1       0.16           88.00
## 551 LP002778         1       0.16           88.16
## 552 LP002784         1       0.16           88.32
## 553 LP002785         1       0.16           88.48
## 554 LP002788         1       0.16           88.64
## 555 LP002789         1       0.16           88.80
## 556 LP002792         1       0.16           88.96
## 557 LP002794         1       0.16           89.12
## 558 LP002795         1       0.16           89.28
## 559 LP002798         1       0.16           89.44
## 560 LP002804         1       0.16           89.60
## 561 LP002807         1       0.16           89.76
## 562 LP002813         1       0.16           89.92
## 563 LP002820         1       0.16           90.08
## 564 LP002821         1       0.16           90.24
## 565 LP002832         1       0.16           90.40
## 566 LP002833         1       0.16           90.56
## 567 LP002836         1       0.16           90.72
## 568 LP002837         1       0.16           90.88
## 569 LP002840         1       0.16           91.04
## 570 LP002841         1       0.16           91.20
## 571 LP002842         1       0.16           91.36
## 572 LP002847         1       0.16           91.52
## 573 LP002855         1       0.16           91.68
## 574 LP002862         1       0.16           91.84
## 575 LP002863         1       0.16           92.00
## 576 LP002868         1       0.16           92.16
## 577 LP002872         1       0.16           92.32
## 578 LP002874         1       0.16           92.48
## 579 LP002877         1       0.16           92.64
## 580 LP002888         1       0.16           92.80
## 581 LP002892         1       0.16           92.96
## 582 LP002893         1       0.16           93.12
## 583 LP002894         1       0.16           93.28
## 584 LP002898         1       0.16           93.44
## 585 LP002911         1       0.16           93.60
## 586 LP002912         1       0.16           93.76
## 587 LP002916         1       0.16           93.92
## 588 LP002917         1       0.16           94.08
## 589 LP002925         1       0.16           94.24
## 590 LP002926         1       0.16           94.40
## 591 LP002928         1       0.16           94.56
## 592 LP002931         1       0.16           94.72
## 593 LP002933         1       0.16           94.88
## 594 LP002936         1       0.16           95.04
## 595 LP002938         1       0.16           95.20
## 596 LP002940         1       0.16           95.36
## 597 LP002941         1       0.16           95.52
## 598 LP002943         1       0.16           95.68
## 599 LP002945         1       0.16           95.84
## 600 LP002948         1       0.16           96.00
## 601 LP002949         1       0.16           96.16
## 602 LP002950         1       0.16           96.32
## 603 LP002953         1       0.16           96.48
## 604 LP002958         1       0.16           96.64
## 605 LP002959         1       0.16           96.80
## 606 LP002960         1       0.16           96.96
## 607 LP002961         1       0.16           97.12
## 608 LP002964         1       0.16           97.28
## 609 LP002974         1       0.16           97.44
## 610 LP002978         1       0.16           97.60
## 611 LP002979         1       0.16           97.76
## 612 LP002983         1       0.16           97.92
## 613 LP002984         1       0.16           98.08
## 614 LP002990         1       0.16          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
##   Gender frequency percentage cumulative_perc
## 1   Male       489      79.64           79.64
## 2 Female       112      18.24           97.88
## 3      0        13       2.12          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##   Married frequency percentage cumulative_perc
## 1     Yes       398      64.82           64.82
## 2      No       213      34.69           99.51
## 3       0         3       0.49          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##   Dependents frequency percentage cumulative_perc
## 1          0       360      58.63           58.63
## 2          1       102      16.61           75.24
## 3          2       101      16.45           91.69
## 4         3+        51       8.31          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##      Education frequency percentage cumulative_perc
## 1     Graduate       480      78.18           78.18
## 2 Not Graduate       134      21.82          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##   Self_Employed frequency percentage cumulative_perc
## 1            No       500      81.43           81.43
## 2           Yes        82      13.36           94.79
## 3             0        32       5.21          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##   Property_Area frequency percentage cumulative_perc
## 1     Semiurban       233      37.95           37.95
## 2         Urban       202      32.90           70.85
## 3         Rural       179      29.15          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

##   Loan_Status frequency percentage cumulative_perc
## 1           Y       422      68.73           68.73
## 2           N       192      31.27          100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

## dataloan 
## 
##  13  Variables      614  Observations
## --------------------------------------------------------------------------------
## Loan_ID 
##        n  missing distinct 
##      614        0      614 
## 
## lowest : LP001002 LP001003 LP001005 LP001006 LP001008
## highest: LP002978 LP002979 LP002983 LP002984 LP002990
## --------------------------------------------------------------------------------
## Gender 
##        n  missing distinct 
##      614        0        3 
##                                
## Value           0 Female   Male
## Frequency      13    112    489
## Proportion  0.021  0.182  0.796
## --------------------------------------------------------------------------------
## Married 
##        n  missing distinct 
##      614        0        3 
##                             
## Value          0    No   Yes
## Frequency      3   213   398
## Proportion 0.005 0.347 0.648
## --------------------------------------------------------------------------------
## Dependents 
##        n  missing distinct 
##      614        0        4 
##                                   
## Value          0     1     2    3+
## Frequency    360   102   101    51
## Proportion 0.586 0.166 0.164 0.083
## --------------------------------------------------------------------------------
## Education 
##        n  missing distinct 
##      614        0        2 
##                                     
## Value          Graduate Not Graduate
## Frequency           480          134
## Proportion        0.782        0.218
## --------------------------------------------------------------------------------
## Self_Employed 
##        n  missing distinct 
##      614        0        3 
##                             
## Value          0    No   Yes
## Frequency     32   500    82
## Proportion 0.052 0.814 0.134
## --------------------------------------------------------------------------------
## ApplicantIncome 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      614        0      505        1     5403     4183     1898     2216 
##      .25      .50      .75      .90      .95 
##     2878     3812     5795     9460    14583 
## 
## lowest :   150   210   416   645   674, highest: 39147 39999 51763 63337 81000
## --------------------------------------------------------------------------------
## CoapplicantIncome 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      614        0      287    0.912     1621     2118        0        0 
##      .25      .50      .75      .90      .95 
##        0     1188     2297     3782     4997 
## 
## lowest :     0.00    16.12   189.00   240.00   242.00
## highest: 10968.00 11300.00 20000.00 33837.00 41667.00
## --------------------------------------------------------------------------------
## LoanAmount 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      614        0      204        1    141.2    84.09     38.6     63.6 
##      .25      .50      .75      .90      .95 
##     98.0    125.0    164.8    229.4    293.4 
## 
## lowest :   0   9  17  25  26, highest: 500 570 600 650 700
## --------------------------------------------------------------------------------
## Loan_Amount_Term 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      614        0       11     0.42    334.2    57.12      180      180 
##      .25      .50      .75      .90      .95 
##      360      360      360      360      360 
## 
## lowest :   0  12  36  60  84, highest: 180 240 300 360 480
##                                                                             
## Value          0    12    36    60    84   120   180   240   300   360   480
## Frequency     14     1     2     2     4     3    44     4    13   512    15
## Proportion 0.023 0.002 0.003 0.003 0.007 0.005 0.072 0.007 0.021 0.834 0.024
## --------------------------------------------------------------------------------
## Credit_History 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##      614        0        2    0.525      475   0.7736   0.3508 
## 
## --------------------------------------------------------------------------------
## Property_Area 
##        n  missing distinct 
##      614        0        3 
##                                         
## Value          Rural Semiurban     Urban
## Frequency        179       233       202
## Proportion     0.292     0.379     0.329
## --------------------------------------------------------------------------------
## Loan_Status 
##        n  missing distinct 
##      614        0        2 
##                       
## Value          N     Y
## Frequency    192   422
## Proportion 0.313 0.687
## --------------------------------------------------------------------------------

Tugas 4

Lakukan pemeriksaan distribusi densitas menggunakan R dan Python pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:

  • Univariat numerik
library(ggplot2)
ggplot(dataloan, aes(x = ApplicantIncome))+
  geom_density()

ggplot(dataloan, aes(x = CoapplicantIncome))+
  geom_density()

ggplot(dataloan, aes(x = LoanAmount))+
  geom_density()

  • Bivariat numerik
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.1
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
## 
##     subplot
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
bivariat <- ggplot(dataloan, aes(x = ApplicantIncome, y = LoanAmount)) + 
  geom_point(alpha = .5) +
  geom_density_2d()
ggplotly(bivariat)
bivariat2 <- ggplot(dataloan, aes(x = ApplicantIncome, y = CoapplicantIncome)) + 
  geom_point(alpha = .5) +
  geom_density_2d()
ggplotly(bivariat2)
  • Multivariat numerik
library(GGally)
## Warning: package 'GGally' was built under R version 4.2.1
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:funModeling':
## 
##     range01
ggpairs(dataloan_num)

Tugas 5

Lakukan proses pengujian Hipotesis menggunakan R dan Python pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:

  • Hitunglah margin of error dan estimasi interval untuk proporsi peminjam bejenis kelamin perempuan dalam pada tingkat kepercayaan 95%.
k = sum(dataloan.clean$Gender == "Female")
n = sum(count(dataloan.clean))
pbar = k/n 
SE = sqrt (pbar*(1-pbar)/n); SE
## [1] 0.01750393
E = qnorm(.975)*SE; E
## [1] 0.03430707
pbar + c(-E, E)
## [1] 0.1448596 0.2134737
  • Jika anda berencana menggunakan perkiraan proporsi 50% data konsumen berjenis kelamin perempuan, temukan ukuran sampel yang diperlukan untuk mencapai margin kesalahan 5% untuk data obeservasi pada tingkat kepercayaan 95%.
zstar = qnorm(.975)
p = 0.5
E = 0.05
zstar^2*p*(1-p)/E^2
## [1] 384.1459
  • Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, jika Bank mengklaim bahwa pinjaman rata-rata konsumen adalah:
set.seed(100)
Data1 <- sample_n(dataloan.clean,30)
Data1
  • Lebih besar $ 150.
mu0 = 150
xbar = mean(Data1$LoanAmount)
s = sd(Data1$LoanAmount)
n = sum(count(Data1))
t = (xbar-mu0)/(s/sqrt(n));t
## [1] -2.243642
alpha = .05
t.alpha = qt(1-alpha, df=n-1)
t.alpha
## [1] 1.699127
  • Lebih kecil $ 150
mu0 = 150
xbar = mean(Data1$LoanAmount)
s = sd(Data1$LoanAmount)
n = sum(count(Data1))
t = (xbar-mu0)/(s/sqrt(n));t
## [1] -2.243642
alpha = .05
t.alpha = qt(1-alpha, df=n-1)
-t.alpha
## [1] -1.699127
  • Sama dengan $ 150.
mu0 = 150
xbar = mean(Data1$LoanAmount)
s = sd(Data1$LoanAmount)
n = sum(count(Data1))
t = (xbar-mu0)/(s/sqrt(n));t
## [1] -2.243642
alpha = .05
t.alpha = qt(1-alpha, df=n-1)
t.alpha
## [1] 1.699127
-t.alpha
## [1] -1.699127
  • Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, seperti diatas jika diketahui simpangan baku pinjaman adalah $ 85.
mu0 = 150
xbar = mean(Data1$LoanAmount)
s = 85
n = sum(count(Data1))
t = (xbar-mu0)/(s/sqrt(n));t
## [1] -1.615245
alpha = .05
t.alpha = qt(1-alpha, df=n-1)
t.alpha
## [1] 1.699127