KOMPUTASI
~ Ujian Tengah Semester ~
Kontak | : \(\downarrow\) |
naufal3433@gmail.com | |
https://www.instagram.com/m_naufalardiansyah/ | |
RPubs | https://rpubs.com/muhammad_naufal/ |
Data Set
Kumpulan data akan anda gunakan dalam ujian tengah semester ini adalah data konsumen yang melakukan pinjaman di suatu Bank. Dataset ini memiliki 613 observasi, 13 atribut sebagai berikut:
Tugas 1
Lakukan proses persiapan data dengan R dan Python, dengan beberapa langkah berikut:
- Import Data{.tabset .tabset-fade .tabset-pills}
<- read.csv("C:/logaritma/loan_train.csv", header=T, na.strings=c("","NA"))
dataloan dataloan
- Penanganan Data Hilang
colSums(is.na(dataloan))
## Loan_ID Gender Married Dependents
## 0 13 3 15
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 32 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 22 14 50 0
## Loan_Status
## 0
<- na.omit(dataloan) dataloan.clean
apply(is.na(dataloan),2, which)
## $Loan_ID
## integer(0)
##
## $Gender
## [1] 24 127 172 189 315 335 461 468 478 508 577 589 593
##
## $Married
## [1] 105 229 436
##
## $Dependents
## [1] 103 105 121 227 229 294 302 333 336 347 356 436 518 572 598
##
## $Education
## integer(0)
##
## $Self_Employed
## [1] 12 20 25 30 31 96 108 112 115 159 171 219 232 237 269 296 334 337 345
## [20] 375 381 386 412 433 448 464 469 536 543 580 601 602
##
## $ApplicantIncome
## integer(0)
##
## $CoapplicantIncome
## integer(0)
##
## $LoanAmount
## [1] 1 36 64 82 96 103 104 114 128 203 285 306 323 339 388 436 438 480 525
## [20] 551 552 606
##
## $Loan_Amount_Term
## [1] 20 37 45 46 74 113 166 198 224 233 336 368 422 424
##
## $Credit_History
## [1] 17 25 31 43 80 84 87 96 118 126 130 131 157 182 188 199 220 237 238
## [20] 260 261 280 310 314 318 319 324 349 364 378 393 396 412 445 450 452 461 474
## [39] 491 492 498 504 507 531 534 545 557 566 584 601
##
## $Property_Area
## integer(0)
##
## $Loan_Status
## integer(0)
is.na(dataloan)] = 0
dataloan[ dataloan
- Periksa Data Duplikat
length(unique(dataloan.clean)) == nrow(dataloan.clean)
## [1] FALSE
- Pemisahan Data Kategori dan Numerik
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## Warning: package 'tibble' was built under R version 4.2.1
## Warning: package 'dplyr' was built under R version 4.2.1
## Warning: package 'stringr' was built under R version 4.2.1
## Warning: package 'forcats' was built under R version 4.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
<- select_if(dataloan.clean, is.character)
dataloan_kat dataloan_kat
library(tidyverse)
<- select_if(dataloan.clean, is.numeric)
dataloan_num dataloan_num
- Penanganan Data Numerik
<- as.data.frame(lapply(dataloan_num,scale))
standarisasi standarisasi
<- function(x){(x- min(x))/(max(x)-min(x))}
normalisasi
<- as.data.frame(lapply(dataloan_num,normalisasi))
normalisasi1 normalisasi1
<- function(x){(x-median(x))/quantile(x,probs = .75)-quantile(x,probs = .25)}
robust
<- as.data.frame(lapply(dataloan_num,robust))
robust_skala robust_skala
- Penganann Data Pencilan
library(leaps)
## Warning: package 'leaps' was built under R version 4.2.1
<- function(x) {
outliers <- quantile(x, probs = .25)
Q1 <- quantile(x, probs = .75)
Q3 <- IQR(x)
iqr
<- Q3 + (1.5*iqr)
upper_limit <- Q1 - (1.5*iqr)
lower_limit }
<- dataloan
dpen <-function(x){
outliers<- quantile (x, probs=.25)
Q1 <- quantile(x, probs=.75)
Q3 = Q3-Q1
iqr
=Q3 + (iqr*1.5)
upper_limit =Q1 -(iqr*1.5)
lower_limit
> upper_limit | x < lower_limit}
x
<- subset(dpen, outliers (dpen$LoanAmount))
outlier1 <- subset(dpen, outliers (dpen$ApplicantIncome))
outlier2 <- subset(dpen, outliers (dpen$CoapplicantIncome))
outlier3
<- rbind(outlier1, outlier2, outlier3) %>% distinct()
dpenlier
dpenlier
- Penanganan Data Kategorikal
%>% summarise_all(n_distinct) dataloan_kat
<-factor(dataloan_kat$Gender, labels=c(0, 1))
GenderLabel <-factor(dataloan_kat$Married, labels=c(0, 1))
MarriedLabel <-factor (dataloan_kat$Dependents, labels=c(0, 1, 2, 3))
DependentsLabel <-factor(dataloan_kat$Education, labels=c(0, 1))
EducationLabel <-factor(dataloan_kat$Self_Employed, labels=c(0, 1))
Self_EmployedLabel <- factor (dataloan_kat$Property_Area, labels=c(0, 1, 2))
Property_AreaLabel <-factor (dataloan_kat$Loan_Status, labels=c(0, 1))
Loan_StatusLabel
<- data.frame("ID" = dataloan_kat$Loan_ID, GenderLabel, MarriedLabel, DependentsLabel, EducationLabel, Self_EmployedLabel, Property_AreaLabel, Loan_StatusLabel)
dataloan_kat_labeled
dataloan_kat_labeled
Tugas 2
Lakukan Proses Visualisasi Data dengan menggunakan R dan Python dengan beberapa langkah berikut:
- Visualisasi Univariabel
Kategorik
library(ggplot2)
<- read.csv("C:/logaritma/loan_train.csv")
Loan_Trainggplot(Loan_Train, aes(x = Gender)) +
geom_bar(fill = "cornflowerblue",
color= "azure4") +
theme_minimal() +
labs(x = "gender",
y = "Frequency",
title = "Loan by Gender")
library(ggplot2)
<- read.csv("C:/logaritma/loan_train.csv")
Loan_Trainggplot(Loan_Train, aes(x = Married)) +
geom_bar(fill = "cornflowerblue",
color= "azure4") +
theme_minimal() +
labs(x = "Married",
y = "Frequency",
title = "Loan by Married")
library(ggplot2)
<- read.csv("C:/logaritma/loan_train.csv")
Loan_Trainggplot(Loan_Train, aes(x = Dependents)) +
geom_bar(fill = "cornflowerblue",
color= "azure4") +
theme_minimal() +
labs(x = "Dependents",
y = "Frequency",
title = "Loan by Dependents")
library(ggplot2)
<- read.csv("C:/logaritma/loan_train.csv")
Loan_Trainggplot(Loan_Train, aes(x = Education)) +
geom_bar(fill = "cornflowerblue",
color= "azure4") +
theme_minimal() +
labs(x = "Education",
y = "Frequency",
title = "Loan by Education")
library(ggplot2)
<- read.csv("C:/logaritma/loan_train.csv")
Loan_Trainggplot(Loan_Train, aes(x = Property_Area)) +
geom_bar(fill = "cornflowerblue",
color= "azure4") +
theme_minimal() +
labs(x = "Property_Area",
y = "Frequency",
title = "Loan by Property_Area")
library(ggplot2)
<- read.csv("C:/logaritma/loan_train.csv")
Loan_Trainggplot(Loan_Train, aes(x = Loan_Status)) +
geom_bar(fill = "cornflowerblue",
color= "azure4") +
theme_minimal() +
labs(x = "Loan_Status",
y = "Frequency",
title = "Loan by Loan Status")
library(ggplot2)
<- read.csv("C:/logaritma/loan_train.csv")
Loan_Trainggplot(Loan_Train, aes(x = Credit_History)) +
geom_bar(fill = "cornflowerblue",
color= "azure4") +
theme_minimal() +
labs(x = "Credit_History",
y = "Frequency",
title = "Credit History")
## Warning: Removed 50 rows containing non-finite values (stat_count).
Numerik
library(ggplot2)
library(scales)
## Warning: package 'scales' was built under R version 4.2.1
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
ggplot(Loan_Train,
aes(x = LoanAmount,
y= ..count.. / sum(..count..))) +
geom_histogram(fill = "cornflowerblue",
color = "white",
binwidth = 5) +
theme_minimal() +
labs(title="LoanAmount",
y = "Percent",
x = "LoanAmount") +
scale_y_continuous(labels = percent)
## Warning: Removed 22 rows containing non-finite values (stat_bin).
library(ggplot2)
library(scales)
ggplot(Loan_Train,
aes(x = Loan_Amount_Term,
y= ..count.. / sum(..count..))) +
geom_histogram(fill = "cornflowerblue",
color = "white",
binwidth = 5) +
theme_minimal() +
labs(title="Loan Amount Term",
y = "Percent",
x = "Loan_Amount_Term") +
scale_y_continuous(labels = percent)
## Warning: Removed 14 rows containing non-finite values (stat_bin).
library(ggplot2)
library(scales)
ggplot(Loan_Train,
aes(x = ApplicantIncome,
y= ..count.. / sum(..count..))) +
geom_histogram(fill = "cornflowerblue",
color = "white",
binwidth = 5) +
theme_minimal() +
labs(title="ApplicantIncom",
y = "Percent",
x = "ApplicantIncome") +
scale_y_continuous(labels = percent)
library(ggplot2)
library(scales)
ggplot(Loan_Train,
aes(x = CoapplicantIncome,
y= ..count.. / sum(..count..))) +
geom_histogram(fill = "cornflowerblue",
color = "white",
binwidth = 5) +
theme_minimal() +
labs(title="CoapplicantIncom",
y = "Percent",
x = "CoapplicantIncome") +
scale_y_continuous(labels = percent)
library(ggplot2)
library(scales)
ggplot(Loan_Train,
aes(x = Credit_History,
y= ..count.. / sum(..count..))) +
geom_histogram(fill = "cornflowerblue",
color = "white",
binwidth = 5) +
theme_minimal() +
labs(title="Credit_History",
y = "Percent",
x = "CoapplicantIncome") +
scale_y_continuous(labels = percent)
## Warning: Removed 50 rows containing non-finite values (stat_bin).
- Visualisasi Bivariabel
Categorical VS Categorical
library(ggplot2)
ggplot(Loan_Train, aes(x = Gender, fill = Education)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Gender, fill = Self_Employed)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Gender, fill = Property_Area)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Gender, fill = Loan_Status)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Married, fill = Education)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Married, fill = Self_Employed)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Married, fill = Loan_Status)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Education, fill = Property_Area)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Education, fill = Gender)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Gender, fill = Education)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
library(ggplot2)
ggplot(Loan_Train, aes(x = Gender, fill = Married)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))
Continous VS Continous
library(ggplot2)
library(scales)
data(Loan_Train, package="carData")
## Warning in data(Loan_Train, package = "carData"): data set 'Loan_Train' not
## found
# enhanced scatter plot
ggplot(Loan_Train,
aes(x = LoanAmount,
y = ApplicantIncome)) +
geom_point(color="cornflowerblue",
size = 2,
alpha=.8) +
scale_y_continuous(label = scales::dollar,
limits = c(0, 10000)) +
scale_x_continuous(breaks = seq(0, 60, 10),
limits=c(0, 60)) +
theme_minimal() +
labs(x = "LoanAmount",
y = "ApplicantIncome",
title = "",
subtitle = "")
## Warning: Removed 579 rows containing missing values (geom_point).
library(ggplot2)
library(scales)
data(Loan_Train, package="carData")
## Warning in data(Loan_Train, package = "carData"): data set 'Loan_Train' not
## found
# enhanced scatter plot
ggplot(Loan_Train,
aes(x = LoanAmount,
y = CoapplicantIncome)) +
geom_point(color="cornflowerblue",
size = 2,
alpha=.8) +
scale_y_continuous(label = scales::dollar,
limits = c(0, 10000)) +
scale_x_continuous(breaks = seq(0, 60, 10),
limits=c(0, 60)) +
theme_minimal() +
labs(x = " LoanAmount",
y = "CoapplicantIncome",
title = "",
subtitle = "")
## Warning: Removed 577 rows containing missing values (geom_point).
library(ggplot2)
library(scales)
data(Loan_Train, package="carData")
## Warning in data(Loan_Train, package = "carData"): data set 'Loan_Train' not
## found
# enhanced scatter plot
ggplot(Loan_Train,
aes(x = ApplicantIncome,
y = CoapplicantIncome)) +
geom_point(color="cornflowerblue",
size = 2,
alpha=.8) +
scale_y_continuous(label = scales::dollar,
limits = c(0, 10000)) +
scale_x_continuous(breaks = seq(0, 60, 10),
limits=c(0, 60)) +
theme_minimal() +
labs(x = "ApplicantIncome",
y = "CoapplicantIncome",
title = "",
subtitle = "")
## Warning: Removed 614 rows containing missing values (geom_point).
library(ggplot2)
library(scales)
data(Loan_Train, package="carData")
## Warning in data(Loan_Train, package = "carData"): data set 'Loan_Train' not
## found
# enhanced scatter plot
ggplot(Loan_Train,
aes(x = LoanAmount,
y = Loan_Amount_Term)) +
geom_point(color="cornflowerblue",
size = 2,
alpha=.8) +
scale_y_continuous(label = scales::dollar,
limits = c(0, 10000)) +
scale_x_continuous(breaks = seq(0, 60, 10),
limits=c(0, 60)) +
theme_minimal() +
labs(x = "LoanAmount",
y = "Loan_Amount_Term",
title = "",
subtitle = "")
## Warning: Removed 577 rows containing missing values (geom_point).
- Visualisasi Multivariabel
library(ggplot2)
ggplot(Loan_Train,
aes(x = LoanAmount,
y = ApplicantIncome,
color = Education,
shape = Gender)) +
geom_point(size = 3, alpha = .6) +
theme_minimal() +
labs(title = "")
## Warning: Removed 22 rows containing missing values (geom_point).
Tugas 3
Lakukan proses analisa data secara deskriptif menggunakan R dan Python dengan beberapa langkah berikut:
- Kualitatif
- Kategori Univariat
library(readr)
= read_csv("C:/logaritma/loan_train.csv") df.loan
## Rows: 614 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Loan_ID, Gender, Married, Dependents, Education, Self_Employed, Pro...
## dbl (5): ApplicantIncome, CoapplicantIncome, LoanAmount, Loan_Amount_Term, C...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#str(df)
apply(is.na(df.loan),2, which)
## $Loan_ID
## integer(0)
##
## $Gender
## [1] 24 127 172 189 315 335 461 468 478 508 577 589 593
##
## $Married
## [1] 105 229 436
##
## $Dependents
## [1] 103 105 121 227 229 294 302 333 336 347 356 436 518 572 598
##
## $Education
## integer(0)
##
## $Self_Employed
## [1] 12 20 25 30 31 96 108 112 115 159 171 219 232 237 269 296 334 337 345
## [20] 375 381 386 412 433 448 464 469 536 543 580 601 602
##
## $ApplicantIncome
## integer(0)
##
## $CoapplicantIncome
## integer(0)
##
## $LoanAmount
## [1] 1 36 64 82 96 103 104 114 128 203 285 306 323 339 388 436 438 480 525
## [20] 551 552 606
##
## $Loan_Amount_Term
## [1] 20 37 45 46 74 113 166 198 224 233 336 368 422 424
##
## $Credit_History
## [1] 17 25 31 43 80 84 87 96 118 126 130 131 157 182 188 199 220 237 238
## [20] 260 261 280 310 314 318 319 324 349 364 378 393 396 412 445 450 452 461 474
## [39] 491 492 498 504 507 531 534 545 557 566 584 601
##
## $Property_Area
## integer(0)
##
## $Loan_Status
## integer(0)
<-na.omit(df.loan)
df.loanhead(df.loan,3)
<- table(df.loan$Gender)
Cat1 Cat1
##
## Female Male
## 86 394
prop.table(table(df.loan$Gender))
##
## Female Male
## 0.1791667 0.8208333
- Kategori Bivariat
library(readr)
library(dplyr)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
<- df.loan %>%
Cat2select(Gender, Education) %>%
table()
#prop.table()
Cat2
## Education
## Gender Graduate Not Graduate
## Female 73 13
## Male 310 84
- Kategori Multivariat
<- df.loan %>%
Cat3 select(Gender, Education, Loan_Status) %>%
#table()
#prop.table()
ftable()
Cat3
## Loan_Status N Y
## Gender Education
## Female Graduate 28 45
## Not Graduate 4 9
## Male Graduate 84 226
## Not Graduate 32 52
Kuantitatif
- Univariat numerik
<- df.loan %>%
Quan.loan select_if(is.numeric)
names(Quan.loan)
## [1] "ApplicantIncome" "CoapplicantIncome" "LoanAmount"
## [4] "Loan_Amount_Term" "Credit_History"
mean(Quan.loan$LoanAmount)
## [1] 144.7354
quantile(Quan.loan$LoanAmount)
## 0% 25% 50% 75% 100%
## 9 100 128 170 600
median(Quan.loan$LoanAmount)
## [1] 128
mode(Quan.loan$LoanAmount)
## [1] "numeric"
summary(Quan.loan)
## ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term
## Min. : 150 Min. : 0 Min. : 9.0 Min. : 36.0
## 1st Qu.: 2899 1st Qu.: 0 1st Qu.:100.0 1st Qu.:360.0
## Median : 3859 Median : 1084 Median :128.0 Median :360.0
## Mean : 5364 Mean : 1581 Mean :144.7 Mean :342.1
## 3rd Qu.: 5852 3rd Qu.: 2253 3rd Qu.:170.0 3rd Qu.:360.0
## Max. :81000 Max. :33837 Max. :600.0 Max. :480.0
## Credit_History
## Min. :0.0000
## 1st Qu.:1.0000
## Median :1.0000
## Mean :0.8542
## 3rd Qu.:1.0000
## Max. :1.0000
var(Quan.loan$LoanAmount)
## [1] 6481.565
IQR(Quan.loan$LoanAmount)
## [1] 70
mad(Quan.loan$Credit_History)
## [1] 0
sd(Quan.loan$Credit_History)
## [1] 0.3533073
library(e1071)
## Warning: package 'e1071' was built under R version 4.2.1
skewness(Quan.loan$LoanAmount)
## [1] 2.346698
kurtosis(Quan.loan$LoanAmount)
## [1] 8.354478
- Bivariat numerik
cov(Quan.loan$LoanAmount,Quan.loan$Loan_Amount_Term)
## [1] 267.0571
cor(Quan.loan$LoanAmount,Quan.loan$Loan_Amount_Term)
## [1] 0.05086675
=(Quan.loan$LoanAmount-mean(Quan.loan$LoanAmount))/sd(Quan.loan$LoanAmount) zscore
- Multivariat numerik
cov(Quan.loan)
## ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome 32129072.2408 -1.670551e+06 226029.825404
## CoapplicantIncome -1670550.7308 6.852313e+06 40197.560179
## LoanAmount 226029.8254 4.019756e+04 6481.564505
## Loan_Amount_Term -4006.1953 -9.857739e+02 267.057098
## Credit_History -112.4526 -8.038516e+00 -1.159751
## Loan_Amount_Term Credit_History
## ApplicantIncome -4006.1953027 -112.4526357
## CoapplicantIncome -985.7738706 -8.0385160
## LoanAmount 267.0570981 -1.1597512
## Loan_Amount_Term 4252.6572025 0.7588727
## Credit_History 0.7588727 0.1248260
cor(Quan.loan)
## ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome 1.00000000 -0.112587969 0.49530959
## CoapplicantIncome -0.11258797 1.000000000 0.19073974
## LoanAmount 0.49530959 0.190739737 1.00000000
## Loan_Amount_Term -0.01083809 -0.005774688 0.05086675
## Credit_History -0.05615235 -0.008691700 -0.04077297
## Loan_Amount_Term Credit_History
## ApplicantIncome -0.010838092 -0.05615235
## CoapplicantIncome -0.005774688 -0.00869170
## LoanAmount 0.050866753 -0.04077297
## Loan_Amount_Term 1.000000000 0.03293716
## Credit_History 0.032937159 1.00000000
- EDA dengan cara Malas
library(funModeling)
## Warning: package 'funModeling' was built under R version 4.2.1
## Loading required package: Hmisc
## Warning: package 'Hmisc' was built under R version 4.2.1
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:e1071':
##
## impute
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## funModeling v.1.9.4 :)
## Examples and tutorials at livebook.datascienceheroes.com
## / Now in Spanish: librovivodecienciadedatos.ai
library(tidyverse)
library(Hmisc)
library(skimr)
## Warning: package 'skimr' was built under R version 4.2.1
<- function(dataloan)
basic_eda
{glimpse(dataloan)
skim(dataloan)
df_status(dataloan)
freq(dataloan)
profiling_num(dataloan)
plot_num(dataloan)
describe(dataloan)
}basic_eda(dataloan)
## Rows: 614
## Columns: 13
## $ Loan_ID <chr> "LP001002", "LP001003", "LP001005", "LP001006", "LP0…
## $ Gender <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Mal…
## $ Married <chr> "No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes"…
## $ Dependents <chr> "0", "1", "0", "0", "0", "2", "0", "3+", "2", "1", "…
## $ Education <chr> "Graduate", "Graduate", "Graduate", "Not Graduate", …
## $ Self_Employed <chr> "No", "No", "Yes", "No", "No", "Yes", "No", "No", "N…
## $ ApplicantIncome <int> 5849, 4583, 3000, 2583, 6000, 5417, 2333, 3036, 4006…
## $ CoapplicantIncome <dbl> 0, 1508, 0, 2358, 0, 4196, 1516, 2504, 1526, 10968, …
## $ LoanAmount <dbl> 0, 128, 66, 120, 141, 267, 95, 158, 168, 349, 70, 10…
## $ Loan_Amount_Term <dbl> 360, 360, 360, 360, 360, 360, 360, 360, 360, 360, 36…
## $ Credit_History <dbl> 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0…
## $ Property_Area <chr> "Urban", "Rural", "Urban", "Urban", "Urban", "Urban"…
## $ Loan_Status <chr> "Y", "N", "Y", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y…
## variable q_zeros p_zeros q_na p_na q_inf p_inf type unique
## 1 Loan_ID 0 0.00 0 0 0 0 character 614
## 2 Gender 13 2.12 0 0 0 0 character 3
## 3 Married 3 0.49 0 0 0 0 character 3
## 4 Dependents 360 58.63 0 0 0 0 character 4
## 5 Education 0 0.00 0 0 0 0 character 2
## 6 Self_Employed 32 5.21 0 0 0 0 character 3
## 7 ApplicantIncome 0 0.00 0 0 0 0 integer 505
## 8 CoapplicantIncome 273 44.46 0 0 0 0 numeric 287
## 9 LoanAmount 22 3.58 0 0 0 0 numeric 204
## 10 Loan_Amount_Term 14 2.28 0 0 0 0 numeric 11
## 11 Credit_History 139 22.64 0 0 0 0 numeric 2
## 12 Property_Area 0 0.00 0 0 0 0 character 3
## 13 Loan_Status 0 0.00 0 0 0 0 character 2
## Warning in freq_logic(data = data, input = input[i], plot, na.rm, path_out =
## path_out): Skipping plot for variable 'Loan_ID' (more than 100 categories)
## Loan_ID frequency percentage cumulative_perc
## 1 LP001002 1 0.16 0.16
## 2 LP001003 1 0.16 0.32
## 3 LP001005 1 0.16 0.48
## 4 LP001006 1 0.16 0.64
## 5 LP001008 1 0.16 0.80
## 6 LP001011 1 0.16 0.96
## 7 LP001013 1 0.16 1.12
## 8 LP001014 1 0.16 1.28
## 9 LP001018 1 0.16 1.44
## 10 LP001020 1 0.16 1.60
## 11 LP001024 1 0.16 1.76
## 12 LP001027 1 0.16 1.92
## 13 LP001028 1 0.16 2.08
## 14 LP001029 1 0.16 2.24
## 15 LP001030 1 0.16 2.40
## 16 LP001032 1 0.16 2.56
## 17 LP001034 1 0.16 2.72
## 18 LP001036 1 0.16 2.88
## 19 LP001038 1 0.16 3.04
## 20 LP001041 1 0.16 3.20
## 21 LP001043 1 0.16 3.36
## 22 LP001046 1 0.16 3.52
## 23 LP001047 1 0.16 3.68
## 24 LP001050 1 0.16 3.84
## 25 LP001052 1 0.16 4.00
## 26 LP001066 1 0.16 4.16
## 27 LP001068 1 0.16 4.32
## 28 LP001073 1 0.16 4.48
## 29 LP001086 1 0.16 4.64
## 30 LP001087 1 0.16 4.80
## 31 LP001091 1 0.16 4.96
## 32 LP001095 1 0.16 5.12
## 33 LP001097 1 0.16 5.28
## 34 LP001098 1 0.16 5.44
## 35 LP001100 1 0.16 5.60
## 36 LP001106 1 0.16 5.76
## 37 LP001109 1 0.16 5.92
## 38 LP001112 1 0.16 6.08
## 39 LP001114 1 0.16 6.24
## 40 LP001116 1 0.16 6.40
## 41 LP001119 1 0.16 6.56
## 42 LP001120 1 0.16 6.72
## 43 LP001123 1 0.16 6.88
## 44 LP001131 1 0.16 7.04
## 45 LP001136 1 0.16 7.20
## 46 LP001137 1 0.16 7.36
## 47 LP001138 1 0.16 7.52
## 48 LP001144 1 0.16 7.68
## 49 LP001146 1 0.16 7.84
## 50 LP001151 1 0.16 8.00
## 51 LP001155 1 0.16 8.16
## 52 LP001157 1 0.16 8.32
## 53 LP001164 1 0.16 8.48
## 54 LP001179 1 0.16 8.64
## 55 LP001186 1 0.16 8.80
## 56 LP001194 1 0.16 8.96
## 57 LP001195 1 0.16 9.12
## 58 LP001197 1 0.16 9.28
## 59 LP001198 1 0.16 9.44
## 60 LP001199 1 0.16 9.60
## 61 LP001205 1 0.16 9.76
## 62 LP001206 1 0.16 9.92
## 63 LP001207 1 0.16 10.08
## 64 LP001213 1 0.16 10.24
## 65 LP001222 1 0.16 10.40
## 66 LP001225 1 0.16 10.56
## 67 LP001228 1 0.16 10.72
## 68 LP001233 1 0.16 10.88
## 69 LP001238 1 0.16 11.04
## 70 LP001241 1 0.16 11.20
## 71 LP001243 1 0.16 11.36
## 72 LP001245 1 0.16 11.52
## 73 LP001248 1 0.16 11.68
## 74 LP001250 1 0.16 11.84
## 75 LP001253 1 0.16 12.00
## 76 LP001255 1 0.16 12.16
## 77 LP001256 1 0.16 12.32
## 78 LP001259 1 0.16 12.48
## 79 LP001263 1 0.16 12.64
## 80 LP001264 1 0.16 12.80
## 81 LP001265 1 0.16 12.96
## 82 LP001266 1 0.16 13.12
## 83 LP001267 1 0.16 13.28
## 84 LP001273 1 0.16 13.44
## 85 LP001275 1 0.16 13.60
## 86 LP001279 1 0.16 13.76
## 87 LP001280 1 0.16 13.92
## 88 LP001282 1 0.16 14.08
## 89 LP001289 1 0.16 14.24
## 90 LP001310 1 0.16 14.40
## 91 LP001316 1 0.16 14.56
## 92 LP001318 1 0.16 14.72
## 93 LP001319 1 0.16 14.88
## 94 LP001322 1 0.16 15.04
## 95 LP001325 1 0.16 15.20
## 96 LP001326 1 0.16 15.36
## 97 LP001327 1 0.16 15.52
## 98 LP001333 1 0.16 15.68
## 99 LP001334 1 0.16 15.84
## 100 LP001343 1 0.16 16.00
## 101 LP001345 1 0.16 16.16
## 102 LP001349 1 0.16 16.32
## 103 LP001350 1 0.16 16.48
## 104 LP001356 1 0.16 16.64
## 105 LP001357 1 0.16 16.80
## 106 LP001367 1 0.16 16.96
## 107 LP001369 1 0.16 17.12
## 108 LP001370 1 0.16 17.28
## 109 LP001379 1 0.16 17.44
## 110 LP001384 1 0.16 17.60
## 111 LP001385 1 0.16 17.76
## 112 LP001387 1 0.16 17.92
## 113 LP001391 1 0.16 18.08
## 114 LP001392 1 0.16 18.24
## 115 LP001398 1 0.16 18.40
## 116 LP001401 1 0.16 18.56
## 117 LP001404 1 0.16 18.72
## 118 LP001405 1 0.16 18.88
## 119 LP001421 1 0.16 19.04
## 120 LP001422 1 0.16 19.20
## 121 LP001426 1 0.16 19.36
## 122 LP001430 1 0.16 19.52
## 123 LP001431 1 0.16 19.68
## 124 LP001432 1 0.16 19.84
## 125 LP001439 1 0.16 20.00
## 126 LP001443 1 0.16 20.16
## 127 LP001448 1 0.16 20.32
## 128 LP001449 1 0.16 20.48
## 129 LP001451 1 0.16 20.64
## 130 LP001465 1 0.16 20.80
## 131 LP001469 1 0.16 20.96
## 132 LP001473 1 0.16 21.12
## 133 LP001478 1 0.16 21.28
## 134 LP001482 1 0.16 21.44
## 135 LP001487 1 0.16 21.60
## 136 LP001488 1 0.16 21.76
## 137 LP001489 1 0.16 21.92
## 138 LP001491 1 0.16 22.08
## 139 LP001492 1 0.16 22.24
## 140 LP001493 1 0.16 22.40
## 141 LP001497 1 0.16 22.56
## 142 LP001498 1 0.16 22.72
## 143 LP001504 1 0.16 22.88
## 144 LP001507 1 0.16 23.04
## 145 LP001508 1 0.16 23.20
## 146 LP001514 1 0.16 23.36
## 147 LP001516 1 0.16 23.52
## 148 LP001518 1 0.16 23.68
## 149 LP001519 1 0.16 23.84
## 150 LP001520 1 0.16 24.00
## 151 LP001528 1 0.16 24.16
## 152 LP001529 1 0.16 24.32
## 153 LP001531 1 0.16 24.48
## 154 LP001532 1 0.16 24.64
## 155 LP001535 1 0.16 24.80
## 156 LP001536 1 0.16 24.96
## 157 LP001541 1 0.16 25.12
## 158 LP001543 1 0.16 25.28
## 159 LP001546 1 0.16 25.44
## 160 LP001552 1 0.16 25.60
## 161 LP001560 1 0.16 25.76
## 162 LP001562 1 0.16 25.92
## 163 LP001565 1 0.16 26.08
## 164 LP001570 1 0.16 26.24
## 165 LP001572 1 0.16 26.40
## 166 LP001574 1 0.16 26.56
## 167 LP001577 1 0.16 26.72
## 168 LP001578 1 0.16 26.88
## 169 LP001579 1 0.16 27.04
## 170 LP001580 1 0.16 27.20
## 171 LP001581 1 0.16 27.36
## 172 LP001585 1 0.16 27.52
## 173 LP001586 1 0.16 27.68
## 174 LP001594 1 0.16 27.84
## 175 LP001603 1 0.16 28.00
## 176 LP001606 1 0.16 28.16
## 177 LP001608 1 0.16 28.32
## 178 LP001610 1 0.16 28.48
## 179 LP001616 1 0.16 28.64
## 180 LP001630 1 0.16 28.80
## 181 LP001633 1 0.16 28.96
## 182 LP001634 1 0.16 29.12
## 183 LP001636 1 0.16 29.28
## 184 LP001637 1 0.16 29.44
## 185 LP001639 1 0.16 29.60
## 186 LP001640 1 0.16 29.76
## 187 LP001641 1 0.16 29.92
## 188 LP001643 1 0.16 30.08
## 189 LP001644 1 0.16 30.24
## 190 LP001647 1 0.16 30.40
## 191 LP001653 1 0.16 30.56
## 192 LP001656 1 0.16 30.72
## 193 LP001657 1 0.16 30.88
## 194 LP001658 1 0.16 31.04
## 195 LP001664 1 0.16 31.20
## 196 LP001665 1 0.16 31.36
## 197 LP001666 1 0.16 31.52
## 198 LP001669 1 0.16 31.68
## 199 LP001671 1 0.16 31.84
## 200 LP001673 1 0.16 32.00
## 201 LP001674 1 0.16 32.16
## 202 LP001677 1 0.16 32.32
## 203 LP001682 1 0.16 32.48
## 204 LP001688 1 0.16 32.64
## 205 LP001691 1 0.16 32.80
## 206 LP001692 1 0.16 32.96
## 207 LP001693 1 0.16 33.12
## 208 LP001698 1 0.16 33.28
## 209 LP001699 1 0.16 33.44
## 210 LP001702 1 0.16 33.60
## 211 LP001708 1 0.16 33.76
## 212 LP001711 1 0.16 33.92
## 213 LP001713 1 0.16 34.08
## 214 LP001715 1 0.16 34.24
## 215 LP001716 1 0.16 34.40
## 216 LP001720 1 0.16 34.56
## 217 LP001722 1 0.16 34.72
## 218 LP001726 1 0.16 34.88
## 219 LP001732 1 0.16 35.04
## 220 LP001734 1 0.16 35.20
## 221 LP001736 1 0.16 35.36
## 222 LP001743 1 0.16 35.52
## 223 LP001744 1 0.16 35.68
## 224 LP001749 1 0.16 35.84
## 225 LP001750 1 0.16 36.00
## 226 LP001751 1 0.16 36.16
## 227 LP001754 1 0.16 36.32
## 228 LP001758 1 0.16 36.48
## 229 LP001760 1 0.16 36.64
## 230 LP001761 1 0.16 36.80
## 231 LP001765 1 0.16 36.96
## 232 LP001768 1 0.16 37.12
## 233 LP001770 1 0.16 37.28
## 234 LP001776 1 0.16 37.44
## 235 LP001778 1 0.16 37.60
## 236 LP001784 1 0.16 37.76
## 237 LP001786 1 0.16 37.92
## 238 LP001788 1 0.16 38.08
## 239 LP001790 1 0.16 38.24
## 240 LP001792 1 0.16 38.40
## 241 LP001798 1 0.16 38.56
## 242 LP001800 1 0.16 38.72
## 243 LP001806 1 0.16 38.88
## 244 LP001807 1 0.16 39.04
## 245 LP001811 1 0.16 39.20
## 246 LP001813 1 0.16 39.36
## 247 LP001814 1 0.16 39.52
## 248 LP001819 1 0.16 39.68
## 249 LP001824 1 0.16 39.84
## 250 LP001825 1 0.16 40.00
## 251 LP001835 1 0.16 40.16
## 252 LP001836 1 0.16 40.32
## 253 LP001841 1 0.16 40.48
## 254 LP001843 1 0.16 40.64
## 255 LP001844 1 0.16 40.80
## 256 LP001846 1 0.16 40.96
## 257 LP001849 1 0.16 41.12
## 258 LP001854 1 0.16 41.28
## 259 LP001859 1 0.16 41.44
## 260 LP001864 1 0.16 41.60
## 261 LP001865 1 0.16 41.76
## 262 LP001868 1 0.16 41.92
## 263 LP001870 1 0.16 42.08
## 264 LP001871 1 0.16 42.24
## 265 LP001872 1 0.16 42.40
## 266 LP001875 1 0.16 42.56
## 267 LP001877 1 0.16 42.72
## 268 LP001882 1 0.16 42.88
## 269 LP001883 1 0.16 43.04
## 270 LP001884 1 0.16 43.20
## 271 LP001888 1 0.16 43.36
## 272 LP001891 1 0.16 43.52
## 273 LP001892 1 0.16 43.68
## 274 LP001894 1 0.16 43.84
## 275 LP001896 1 0.16 44.00
## 276 LP001900 1 0.16 44.16
## 277 LP001903 1 0.16 44.32
## 278 LP001904 1 0.16 44.48
## 279 LP001907 1 0.16 44.64
## 280 LP001908 1 0.16 44.80
## 281 LP001910 1 0.16 44.96
## 282 LP001914 1 0.16 45.12
## 283 LP001915 1 0.16 45.28
## 284 LP001917 1 0.16 45.44
## 285 LP001922 1 0.16 45.60
## 286 LP001924 1 0.16 45.76
## 287 LP001925 1 0.16 45.92
## 288 LP001926 1 0.16 46.08
## 289 LP001931 1 0.16 46.24
## 290 LP001935 1 0.16 46.40
## 291 LP001936 1 0.16 46.56
## 292 LP001938 1 0.16 46.72
## 293 LP001940 1 0.16 46.88
## 294 LP001945 1 0.16 47.04
## 295 LP001947 1 0.16 47.20
## 296 LP001949 1 0.16 47.36
## 297 LP001953 1 0.16 47.52
## 298 LP001954 1 0.16 47.68
## 299 LP001955 1 0.16 47.84
## 300 LP001963 1 0.16 48.00
## 301 LP001964 1 0.16 48.16
## 302 LP001972 1 0.16 48.32
## 303 LP001974 1 0.16 48.48
## 304 LP001977 1 0.16 48.64
## 305 LP001978 1 0.16 48.80
## 306 LP001990 1 0.16 48.96
## 307 LP001993 1 0.16 49.12
## 308 LP001994 1 0.16 49.28
## 309 LP001996 1 0.16 49.44
## 310 LP001998 1 0.16 49.60
## 311 LP002002 1 0.16 49.76
## 312 LP002004 1 0.16 49.92
## 313 LP002006 1 0.16 50.08
## 314 LP002008 1 0.16 50.24
## 315 LP002024 1 0.16 50.40
## 316 LP002031 1 0.16 50.56
## 317 LP002035 1 0.16 50.72
## 318 LP002036 1 0.16 50.88
## 319 LP002043 1 0.16 51.04
## 320 LP002050 1 0.16 51.20
## 321 LP002051 1 0.16 51.36
## 322 LP002053 1 0.16 51.52
## 323 LP002054 1 0.16 51.68
## 324 LP002055 1 0.16 51.84
## 325 LP002065 1 0.16 52.00
## 326 LP002067 1 0.16 52.16
## 327 LP002068 1 0.16 52.32
## 328 LP002082 1 0.16 52.48
## 329 LP002086 1 0.16 52.64
## 330 LP002087 1 0.16 52.80
## 331 LP002097 1 0.16 52.96
## 332 LP002098 1 0.16 53.12
## 333 LP002100 1 0.16 53.28
## 334 LP002101 1 0.16 53.44
## 335 LP002103 1 0.16 53.60
## 336 LP002106 1 0.16 53.76
## 337 LP002110 1 0.16 53.92
## 338 LP002112 1 0.16 54.08
## 339 LP002113 1 0.16 54.24
## 340 LP002114 1 0.16 54.40
## 341 LP002115 1 0.16 54.56
## 342 LP002116 1 0.16 54.72
## 343 LP002119 1 0.16 54.88
## 344 LP002126 1 0.16 55.04
## 345 LP002128 1 0.16 55.20
## 346 LP002129 1 0.16 55.36
## 347 LP002130 1 0.16 55.52
## 348 LP002131 1 0.16 55.68
## 349 LP002137 1 0.16 55.84
## 350 LP002138 1 0.16 56.00
## 351 LP002139 1 0.16 56.16
## 352 LP002140 1 0.16 56.32
## 353 LP002141 1 0.16 56.48
## 354 LP002142 1 0.16 56.64
## 355 LP002143 1 0.16 56.80
## 356 LP002144 1 0.16 56.96
## 357 LP002149 1 0.16 57.12
## 358 LP002151 1 0.16 57.28
## 359 LP002158 1 0.16 57.44
## 360 LP002160 1 0.16 57.60
## 361 LP002161 1 0.16 57.76
## 362 LP002170 1 0.16 57.92
## 363 LP002175 1 0.16 58.08
## 364 LP002178 1 0.16 58.24
## 365 LP002180 1 0.16 58.40
## 366 LP002181 1 0.16 58.56
## 367 LP002187 1 0.16 58.72
## 368 LP002188 1 0.16 58.88
## 369 LP002190 1 0.16 59.04
## 370 LP002191 1 0.16 59.20
## 371 LP002194 1 0.16 59.36
## 372 LP002197 1 0.16 59.52
## 373 LP002201 1 0.16 59.68
## 374 LP002205 1 0.16 59.84
## 375 LP002209 1 0.16 60.00
## 376 LP002211 1 0.16 60.16
## 377 LP002219 1 0.16 60.32
## 378 LP002223 1 0.16 60.48
## 379 LP002224 1 0.16 60.64
## 380 LP002225 1 0.16 60.80
## 381 LP002226 1 0.16 60.96
## 382 LP002229 1 0.16 61.12
## 383 LP002231 1 0.16 61.28
## 384 LP002234 1 0.16 61.44
## 385 LP002236 1 0.16 61.60
## 386 LP002237 1 0.16 61.76
## 387 LP002239 1 0.16 61.92
## 388 LP002243 1 0.16 62.08
## 389 LP002244 1 0.16 62.24
## 390 LP002250 1 0.16 62.40
## 391 LP002255 1 0.16 62.56
## 392 LP002262 1 0.16 62.72
## 393 LP002263 1 0.16 62.88
## 394 LP002265 1 0.16 63.04
## 395 LP002266 1 0.16 63.20
## 396 LP002272 1 0.16 63.36
## 397 LP002277 1 0.16 63.52
## 398 LP002281 1 0.16 63.68
## 399 LP002284 1 0.16 63.84
## 400 LP002287 1 0.16 64.00
## 401 LP002288 1 0.16 64.16
## 402 LP002296 1 0.16 64.32
## 403 LP002297 1 0.16 64.48
## 404 LP002300 1 0.16 64.64
## 405 LP002301 1 0.16 64.80
## 406 LP002305 1 0.16 64.96
## 407 LP002308 1 0.16 65.12
## 408 LP002314 1 0.16 65.28
## 409 LP002315 1 0.16 65.44
## 410 LP002317 1 0.16 65.60
## 411 LP002318 1 0.16 65.76
## 412 LP002319 1 0.16 65.92
## 413 LP002328 1 0.16 66.08
## 414 LP002332 1 0.16 66.24
## 415 LP002335 1 0.16 66.40
## 416 LP002337 1 0.16 66.56
## 417 LP002341 1 0.16 66.72
## 418 LP002342 1 0.16 66.88
## 419 LP002345 1 0.16 67.04
## 420 LP002347 1 0.16 67.20
## 421 LP002348 1 0.16 67.36
## 422 LP002357 1 0.16 67.52
## 423 LP002361 1 0.16 67.68
## 424 LP002362 1 0.16 67.84
## 425 LP002364 1 0.16 68.00
## 426 LP002366 1 0.16 68.16
## 427 LP002367 1 0.16 68.32
## 428 LP002368 1 0.16 68.48
## 429 LP002369 1 0.16 68.64
## 430 LP002370 1 0.16 68.80
## 431 LP002377 1 0.16 68.96
## 432 LP002379 1 0.16 69.12
## 433 LP002386 1 0.16 69.28
## 434 LP002387 1 0.16 69.44
## 435 LP002390 1 0.16 69.60
## 436 LP002393 1 0.16 69.76
## 437 LP002398 1 0.16 69.92
## 438 LP002401 1 0.16 70.08
## 439 LP002403 1 0.16 70.24
## 440 LP002407 1 0.16 70.40
## 441 LP002408 1 0.16 70.56
## 442 LP002409 1 0.16 70.72
## 443 LP002418 1 0.16 70.88
## 444 LP002422 1 0.16 71.04
## 445 LP002424 1 0.16 71.20
## 446 LP002429 1 0.16 71.36
## 447 LP002434 1 0.16 71.52
## 448 LP002435 1 0.16 71.68
## 449 LP002443 1 0.16 71.84
## 450 LP002444 1 0.16 72.00
## 451 LP002446 1 0.16 72.16
## 452 LP002447 1 0.16 72.32
## 453 LP002448 1 0.16 72.48
## 454 LP002449 1 0.16 72.64
## 455 LP002453 1 0.16 72.80
## 456 LP002455 1 0.16 72.96
## 457 LP002459 1 0.16 73.12
## 458 LP002467 1 0.16 73.28
## 459 LP002472 1 0.16 73.44
## 460 LP002473 1 0.16 73.60
## 461 LP002478 1 0.16 73.76
## 462 LP002484 1 0.16 73.92
## 463 LP002487 1 0.16 74.08
## 464 LP002489 1 0.16 74.24
## 465 LP002493 1 0.16 74.40
## 466 LP002494 1 0.16 74.56
## 467 LP002500 1 0.16 74.72
## 468 LP002501 1 0.16 74.88
## 469 LP002502 1 0.16 75.04
## 470 LP002505 1 0.16 75.20
## 471 LP002515 1 0.16 75.36
## 472 LP002517 1 0.16 75.52
## 473 LP002519 1 0.16 75.68
## 474 LP002522 1 0.16 75.84
## 475 LP002524 1 0.16 76.00
## 476 LP002527 1 0.16 76.16
## 477 LP002529 1 0.16 76.32
## 478 LP002530 1 0.16 76.48
## 479 LP002531 1 0.16 76.64
## 480 LP002533 1 0.16 76.80
## 481 LP002534 1 0.16 76.96
## 482 LP002536 1 0.16 77.12
## 483 LP002537 1 0.16 77.28
## 484 LP002541 1 0.16 77.44
## 485 LP002543 1 0.16 77.60
## 486 LP002544 1 0.16 77.76
## 487 LP002545 1 0.16 77.92
## 488 LP002547 1 0.16 78.08
## 489 LP002555 1 0.16 78.24
## 490 LP002556 1 0.16 78.40
## 491 LP002560 1 0.16 78.56
## 492 LP002562 1 0.16 78.72
## 493 LP002571 1 0.16 78.88
## 494 LP002582 1 0.16 79.04
## 495 LP002585 1 0.16 79.20
## 496 LP002586 1 0.16 79.36
## 497 LP002587 1 0.16 79.52
## 498 LP002588 1 0.16 79.68
## 499 LP002600 1 0.16 79.84
## 500 LP002602 1 0.16 80.00
## 501 LP002603 1 0.16 80.16
## 502 LP002606 1 0.16 80.32
## 503 LP002615 1 0.16 80.48
## 504 LP002618 1 0.16 80.64
## 505 LP002619 1 0.16 80.80
## 506 LP002622 1 0.16 80.96
## 507 LP002624 1 0.16 81.12
## 508 LP002625 1 0.16 81.28
## 509 LP002626 1 0.16 81.44
## 510 LP002634 1 0.16 81.60
## 511 LP002637 1 0.16 81.76
## 512 LP002640 1 0.16 81.92
## 513 LP002643 1 0.16 82.08
## 514 LP002648 1 0.16 82.24
## 515 LP002652 1 0.16 82.40
## 516 LP002659 1 0.16 82.56
## 517 LP002670 1 0.16 82.72
## 518 LP002682 1 0.16 82.88
## 519 LP002683 1 0.16 83.04
## 520 LP002684 1 0.16 83.20
## 521 LP002689 1 0.16 83.36
## 522 LP002690 1 0.16 83.52
## 523 LP002692 1 0.16 83.68
## 524 LP002693 1 0.16 83.84
## 525 LP002697 1 0.16 84.00
## 526 LP002699 1 0.16 84.16
## 527 LP002705 1 0.16 84.32
## 528 LP002706 1 0.16 84.48
## 529 LP002714 1 0.16 84.64
## 530 LP002716 1 0.16 84.80
## 531 LP002717 1 0.16 84.96
## 532 LP002720 1 0.16 85.12
## 533 LP002723 1 0.16 85.28
## 534 LP002729 1 0.16 85.44
## 535 LP002731 1 0.16 85.60
## 536 LP002732 1 0.16 85.76
## 537 LP002734 1 0.16 85.92
## 538 LP002738 1 0.16 86.08
## 539 LP002739 1 0.16 86.24
## 540 LP002740 1 0.16 86.40
## 541 LP002741 1 0.16 86.56
## 542 LP002743 1 0.16 86.72
## 543 LP002753 1 0.16 86.88
## 544 LP002755 1 0.16 87.04
## 545 LP002757 1 0.16 87.20
## 546 LP002767 1 0.16 87.36
## 547 LP002768 1 0.16 87.52
## 548 LP002772 1 0.16 87.68
## 549 LP002776 1 0.16 87.84
## 550 LP002777 1 0.16 88.00
## 551 LP002778 1 0.16 88.16
## 552 LP002784 1 0.16 88.32
## 553 LP002785 1 0.16 88.48
## 554 LP002788 1 0.16 88.64
## 555 LP002789 1 0.16 88.80
## 556 LP002792 1 0.16 88.96
## 557 LP002794 1 0.16 89.12
## 558 LP002795 1 0.16 89.28
## 559 LP002798 1 0.16 89.44
## 560 LP002804 1 0.16 89.60
## 561 LP002807 1 0.16 89.76
## 562 LP002813 1 0.16 89.92
## 563 LP002820 1 0.16 90.08
## 564 LP002821 1 0.16 90.24
## 565 LP002832 1 0.16 90.40
## 566 LP002833 1 0.16 90.56
## 567 LP002836 1 0.16 90.72
## 568 LP002837 1 0.16 90.88
## 569 LP002840 1 0.16 91.04
## 570 LP002841 1 0.16 91.20
## 571 LP002842 1 0.16 91.36
## 572 LP002847 1 0.16 91.52
## 573 LP002855 1 0.16 91.68
## 574 LP002862 1 0.16 91.84
## 575 LP002863 1 0.16 92.00
## 576 LP002868 1 0.16 92.16
## 577 LP002872 1 0.16 92.32
## 578 LP002874 1 0.16 92.48
## 579 LP002877 1 0.16 92.64
## 580 LP002888 1 0.16 92.80
## 581 LP002892 1 0.16 92.96
## 582 LP002893 1 0.16 93.12
## 583 LP002894 1 0.16 93.28
## 584 LP002898 1 0.16 93.44
## 585 LP002911 1 0.16 93.60
## 586 LP002912 1 0.16 93.76
## 587 LP002916 1 0.16 93.92
## 588 LP002917 1 0.16 94.08
## 589 LP002925 1 0.16 94.24
## 590 LP002926 1 0.16 94.40
## 591 LP002928 1 0.16 94.56
## 592 LP002931 1 0.16 94.72
## 593 LP002933 1 0.16 94.88
## 594 LP002936 1 0.16 95.04
## 595 LP002938 1 0.16 95.20
## 596 LP002940 1 0.16 95.36
## 597 LP002941 1 0.16 95.52
## 598 LP002943 1 0.16 95.68
## 599 LP002945 1 0.16 95.84
## 600 LP002948 1 0.16 96.00
## 601 LP002949 1 0.16 96.16
## 602 LP002950 1 0.16 96.32
## 603 LP002953 1 0.16 96.48
## 604 LP002958 1 0.16 96.64
## 605 LP002959 1 0.16 96.80
## 606 LP002960 1 0.16 96.96
## 607 LP002961 1 0.16 97.12
## 608 LP002964 1 0.16 97.28
## 609 LP002974 1 0.16 97.44
## 610 LP002978 1 0.16 97.60
## 611 LP002979 1 0.16 97.76
## 612 LP002983 1 0.16 97.92
## 613 LP002984 1 0.16 98.08
## 614 LP002990 1 0.16 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Gender frequency percentage cumulative_perc
## 1 Male 489 79.64 79.64
## 2 Female 112 18.24 97.88
## 3 0 13 2.12 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Married frequency percentage cumulative_perc
## 1 Yes 398 64.82 64.82
## 2 No 213 34.69 99.51
## 3 0 3 0.49 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Dependents frequency percentage cumulative_perc
## 1 0 360 58.63 58.63
## 2 1 102 16.61 75.24
## 3 2 101 16.45 91.69
## 4 3+ 51 8.31 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Education frequency percentage cumulative_perc
## 1 Graduate 480 78.18 78.18
## 2 Not Graduate 134 21.82 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Self_Employed frequency percentage cumulative_perc
## 1 No 500 81.43 81.43
## 2 Yes 82 13.36 94.79
## 3 0 32 5.21 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Property_Area frequency percentage cumulative_perc
## 1 Semiurban 233 37.95 37.95
## 2 Urban 202 32.90 70.85
## 3 Rural 179 29.15 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Loan_Status frequency percentage cumulative_perc
## 1 Y 422 68.73 68.73
## 2 N 192 31.27 100.00
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## dataloan
##
## 13 Variables 614 Observations
## --------------------------------------------------------------------------------
## Loan_ID
## n missing distinct
## 614 0 614
##
## lowest : LP001002 LP001003 LP001005 LP001006 LP001008
## highest: LP002978 LP002979 LP002983 LP002984 LP002990
## --------------------------------------------------------------------------------
## Gender
## n missing distinct
## 614 0 3
##
## Value 0 Female Male
## Frequency 13 112 489
## Proportion 0.021 0.182 0.796
## --------------------------------------------------------------------------------
## Married
## n missing distinct
## 614 0 3
##
## Value 0 No Yes
## Frequency 3 213 398
## Proportion 0.005 0.347 0.648
## --------------------------------------------------------------------------------
## Dependents
## n missing distinct
## 614 0 4
##
## Value 0 1 2 3+
## Frequency 360 102 101 51
## Proportion 0.586 0.166 0.164 0.083
## --------------------------------------------------------------------------------
## Education
## n missing distinct
## 614 0 2
##
## Value Graduate Not Graduate
## Frequency 480 134
## Proportion 0.782 0.218
## --------------------------------------------------------------------------------
## Self_Employed
## n missing distinct
## 614 0 3
##
## Value 0 No Yes
## Frequency 32 500 82
## Proportion 0.052 0.814 0.134
## --------------------------------------------------------------------------------
## ApplicantIncome
## n missing distinct Info Mean Gmd .05 .10
## 614 0 505 1 5403 4183 1898 2216
## .25 .50 .75 .90 .95
## 2878 3812 5795 9460 14583
##
## lowest : 150 210 416 645 674, highest: 39147 39999 51763 63337 81000
## --------------------------------------------------------------------------------
## CoapplicantIncome
## n missing distinct Info Mean Gmd .05 .10
## 614 0 287 0.912 1621 2118 0 0
## .25 .50 .75 .90 .95
## 0 1188 2297 3782 4997
##
## lowest : 0.00 16.12 189.00 240.00 242.00
## highest: 10968.00 11300.00 20000.00 33837.00 41667.00
## --------------------------------------------------------------------------------
## LoanAmount
## n missing distinct Info Mean Gmd .05 .10
## 614 0 204 1 141.2 84.09 38.6 63.6
## .25 .50 .75 .90 .95
## 98.0 125.0 164.8 229.4 293.4
##
## lowest : 0 9 17 25 26, highest: 500 570 600 650 700
## --------------------------------------------------------------------------------
## Loan_Amount_Term
## n missing distinct Info Mean Gmd .05 .10
## 614 0 11 0.42 334.2 57.12 180 180
## .25 .50 .75 .90 .95
## 360 360 360 360 360
##
## lowest : 0 12 36 60 84, highest: 180 240 300 360 480
##
## Value 0 12 36 60 84 120 180 240 300 360 480
## Frequency 14 1 2 2 4 3 44 4 13 512 15
## Proportion 0.023 0.002 0.003 0.003 0.007 0.005 0.072 0.007 0.021 0.834 0.024
## --------------------------------------------------------------------------------
## Credit_History
## n missing distinct Info Sum Mean Gmd
## 614 0 2 0.525 475 0.7736 0.3508
##
## --------------------------------------------------------------------------------
## Property_Area
## n missing distinct
## 614 0 3
##
## Value Rural Semiurban Urban
## Frequency 179 233 202
## Proportion 0.292 0.379 0.329
## --------------------------------------------------------------------------------
## Loan_Status
## n missing distinct
## 614 0 2
##
## Value N Y
## Frequency 192 422
## Proportion 0.313 0.687
## --------------------------------------------------------------------------------
Tugas 4
Lakukan pemeriksaan distribusi densitas menggunakan R dan Python pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:
- Univariat numerik
library(ggplot2)
ggplot(dataloan, aes(x = ApplicantIncome))+
geom_density()
ggplot(dataloan, aes(x = CoapplicantIncome))+
geom_density()
ggplot(dataloan, aes(x = LoanAmount))+
geom_density()
- Bivariat numerik
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
##
## subplot
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
<- ggplot(dataloan, aes(x = ApplicantIncome, y = LoanAmount)) +
bivariat geom_point(alpha = .5) +
geom_density_2d()
ggplotly(bivariat)
<- ggplot(dataloan, aes(x = ApplicantIncome, y = CoapplicantIncome)) +
bivariat2 geom_point(alpha = .5) +
geom_density_2d()
ggplotly(bivariat2)
- Multivariat numerik
library(GGally)
## Warning: package 'GGally' was built under R version 4.2.1
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:funModeling':
##
## range01
ggpairs(dataloan_num)
Tugas 5
Lakukan proses pengujian Hipotesis menggunakan R dan Python pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:
- Hitunglah margin of error dan estimasi interval untuk proporsi peminjam bejenis kelamin perempuan dalam pada tingkat kepercayaan 95%.
= sum(dataloan.clean$Gender == "Female")
k = sum(count(dataloan.clean))
n = k/n
pbar = sqrt (pbar*(1-pbar)/n); SE SE
## [1] 0.01750393
= qnorm(.975)*SE; E E
## [1] 0.03430707
+ c(-E, E) pbar
## [1] 0.1448596 0.2134737
- Jika anda berencana menggunakan perkiraan proporsi 50% data konsumen berjenis kelamin perempuan, temukan ukuran sampel yang diperlukan untuk mencapai margin kesalahan 5% untuk data obeservasi pada tingkat kepercayaan 95%.
= qnorm(.975)
zstar = 0.5
p = 0.05
E ^2*p*(1-p)/E^2 zstar
## [1] 384.1459
- Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, jika Bank mengklaim bahwa pinjaman rata-rata konsumen adalah:
set.seed(100)
<- sample_n(dataloan.clean,30)
Data1 Data1
- Lebih besar $ 150.
= 150
mu0 = mean(Data1$LoanAmount)
xbar = sd(Data1$LoanAmount)
s = sum(count(Data1))
n = (xbar-mu0)/(s/sqrt(n));t t
## [1] -2.243642
= .05
alpha = qt(1-alpha, df=n-1)
t.alpha t.alpha
## [1] 1.699127
- Lebih kecil $ 150
= 150
mu0 = mean(Data1$LoanAmount)
xbar = sd(Data1$LoanAmount)
s = sum(count(Data1))
n = (xbar-mu0)/(s/sqrt(n));t t
## [1] -2.243642
= .05
alpha = qt(1-alpha, df=n-1)
t.alpha -t.alpha
## [1] -1.699127
- Sama dengan $ 150.
= 150
mu0 = mean(Data1$LoanAmount)
xbar = sd(Data1$LoanAmount)
s = sum(count(Data1))
n = (xbar-mu0)/(s/sqrt(n));t t
## [1] -2.243642
= .05
alpha = qt(1-alpha, df=n-1)
t.alpha t.alpha
## [1] 1.699127
-t.alpha
## [1] -1.699127
- Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, seperti diatas jika diketahui simpangan baku pinjaman adalah $ 85.
= 150
mu0 = mean(Data1$LoanAmount)
xbar = 85
s = sum(count(Data1))
n = (xbar-mu0)/(s/sqrt(n));t t
## [1] -1.615245
= .05
alpha = qt(1-alpha, df=n-1)
t.alpha t.alpha
## [1] 1.699127