METODE STATISTIKA
~ Ujian Tengah Semester ~
| NIM | 20205520005 |
| Prodi | Teknik Informatika |
| veronica.ardilla@student.matanauniversity.ac.id | |
| RPubs | https://rpubs.com/veronicayose/ |
| Github | https://github.com/veronicayose/ |
Tugas 1
Lakukan proses persiapan data dengan R dan Python, dengan beberapa langkah berikut:
1.1 Import Data
x_train<-read.csv("loan-train.csv")
# Menampilkan enam baris pertama
head(x_train)# Menampilkan enam baris terakhir
tail(x_train)1.2 Penanganan Data Hilang
Untuk mengecek banyaknya data yang hilang
colSums(is.na(x_train))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 22 14 50 0
## Loan_Status
## 0
1.2.1 Dengan Cara Menghapus
colSums(is.na(na.omit(x_train)))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 0 0 0 0
## Loan_Status
## 0
1.2.2 Input Mean/Modus/Median
a. Mengisi numerik yang hilang dengan Mean
#Mengisi numerik data yang hilang di kolom LoanAmount dengan Mean
x_train$LoanAmount[is.na(x_train$LoanAmount)] = mean(x_train$LoanAmount,na.rm = TRUE)
colSums(is.na(x_train))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 0 14 50 0
## Loan_Status
## 0
b. Mengisi numerik yang hilang dengan Modus
#Mengisi numerik data yang hilang di kolom Loan_Amount_Term dengan Modus
x_train$Loan_Amount_Term[is.na(x_train$Loan_Amount_Term)] = mode(x_train$Loan_Amount_Term)
colSums(is.na(x_train))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 0 0 50 0
## Loan_Status
## 0
1.2.3 Interpolasi Linear
library(zoo)
x_train<-read.csv("loan-train.csv")
#Interpolasi linear pada kolom Loan_Amount_Term
x_train$Loan_Amount_Term<-na.approx(x_train$Loan_Amount_Term)
colSums(is.na(x_train))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 22 0 50 0
## Loan_Status
## 0
1.2.4 Forward Filling
require(tidyr)
require(dplyr)
x_train <- x_train %>% fill(Credit_History)
colSums(is.na(x_train))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 22 0 0 0
## Loan_Status
## 0
1.2.5 Backward Filling
require(tidyr)
require(dplyr)
x_train <- x_train %>% fill(LoanAmount, .direction = "up")
colSums(is.na(x_train))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 0 0 0 0
## Loan_Status
## 0
1.3 Periksa Data Duplikat
x_train<-read.csv("loan-train.csv")
#Memeriksa nilai duplikat pada kolom ApplicantIncome
x_train %>% count(x_train$ApplicantIncome) %>% filter(n>1) %>% select(-n)#Untuk memeriksa banyaknya jumlah pada data yang terduplikat
#Catatan: n adalah jumlah yang terduplikat
x_train %>% count(x_train$ApplicantIncome) %>% filter(n>1)1.4 Pemisahan Data Kategori dan Numerik
1.4.1 Memilah data numerik
Filter(is.numeric, x_train)1.4.2 Memilah data kategorikal
Filter(is.character, x_train)1.5 Penanganan Data Numerik
1.5.1 Standardisasi
x_train <- read.csv("loan-train.csv")
x_del <- na.omit(x_train) # menghilangkan na
x_delx_del$ApplicantIncome_stan <- scale(x_del$ApplicantIncome)
x_del$CoapplicantIncome_stan <- scale(x_del$CoapplicantIncome)
x_del$LoanAmount_stan <- scale(x_del$LoanAmount)
x_del$Loan_Amount_Term_stan <- scale(x_del$Loan_Amount_Term)
x_del1.5.2 Normalisasi
x_train <- read.csv("loan-train.csv")
x_del <- na.omit(x_train) # menghilangkan na
normalize <- function(x) {
return((x - min(x)/(max(x)-min(x))))
}
x_delx_del$ApplicantIncome_norm <- normalize(x_del$ApplicantIncome)
x_del$CoapplicantIncome_norm <- normalize(x_del$CoapplicantIncome)
x_del$LoanAmount_norm <- normalize(x_del$LoanAmount)
x_del$Loan_Amount_Term_norm <- normalize(x_del$Loan_Amount_Term)
x_del1.5.3 Penskalaan Robust
x_train <- read.csv("loan-train.csv")
x_del <- na.omit(x_train) # menghilangkan na
robust <- function(x) {
return((x-quantile(x)[2])/(quantile(x)[4]-quantile(x)[2]))
}
x_delx_del$ApplicantIncome_robust <- robust(x_del$ApplicantIncome)
x_del$CoapplicantIncome_robust <- robust(x_del$CoapplicantIncome)
x_del$LoanAmount_robust <- robust(x_del$LoanAmount)
x_del$Loan_Amount_Term_robust <- robust(x_del$Loan_Amount_Term)
x_del1.6 Penanganan Data Pencilan
1.6.1 Metode Statistik
Distribusi Gaussian
x_train <- read.csv("loan-train.csv")
x_train <- na.omit(x_train) # menghilangkan na
pencilan <- function(x) {
sample_mean <- mean(x)
sample_std <- sd(x)
cut_off <- sample_std * 1
lower <- sample_mean - cut_off
upper <- sample_mean + cut_off
return(sapply(x, function(x) {
return(x < lower || x > upper)
}))
}
x_train[pencilan(x_train$CoapplicantIncome),]x_train[pencilan(x_train$ApplicantIncome),]1.6.2 Boxplot atau Rentang Interkuartil (IQR)
x_train <- read.csv("loan-train.csv")
boxplot(x_train$CoapplicantIncome)boxplot(x_train$ApplicantIncome)1.7 Penanganan Data Kategorikal
dim(x_train)## [1] 614 13
head(x_train, 5)x_Category<-Filter(is.character, x_train)
colSums(is.na(x_Category))## Loan_ID Gender Married Dependents Education
## 0 0 0 0 0
## Self_Employed Property_Area Loan_Status
## 0 0 0
1.7.1 Pelabelan
x_train <- read.csv("loan-train.csv") #import data training X
library(superml)
x_label <- LabelEncoder$new()
x_train$Gender <- x_label$fit_transform(x_train$Gender)
x_train$Married <- x_label$fit_transform(x_train$Married)
x_train$Education <- x_label$fit_transform(x_train$Education)
x_train$Self_Employed <- x_label$fit_transform(x_train$Self_Employed)
x_train$Property_Area <- x_label$fit_transform(x_train$Property_Area)
x_train$Loan_Status <- x_label$fit_transform(x_train$Loan_Status)
x_train1.7.2 Pemetaan Kustom
x_train <- read.csv("loan-train.csv")
x_train$Gender[x_train$Gender=="Male"]<-1
x_train$Gender[x_train$Gender=="Female"]<-2
x_train$Gender[x_train$Gender==""]<-3
x_train1.7.3 Variabel Dummy
library(fastDummies)
x_train <- read.csv("loan-train.csv")
x_train <- dummy_cols(x_train)
x_trainTugas 2
Lakukan Proses Visualisasi Data dengan menggunakan R dan Python dengan beberapa langkah berikut:
2.1 Visualisasi Univariabel
2.1.1 Kategori
a. Bar Chart
library(ggplot2)
df<- read.csv("loan-train.csv")
ggplot(df, aes(x = Property_Area)) +
geom_bar(fill = "#C04343", color= "azure4") +
theme_minimal() +
labs(x = "Property Area", y = "Frequency", title = "Property Area of Loan Train") b. Pie Chart
library(dplyr)
library(ggplot2)
library(scales)
plotdata <- df %>%
count(Property_Area) %>%
arrange(desc(Property_Area)) %>%
mutate(prop = round(n*100/sum(n), 1), lab.ypos = cumsum(prop) - 0.5*prop)
# Create Pie chart
mycols <- c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF")
ggplot(plotdata, aes(x = "", y = prop, fill = Property_Area)) +
geom_bar(width = 1, stat = "identity", color = "white") +
coord_polar("y", start = 0) +
geom_text(aes(y = lab.ypos, label = prop), color = "white")+
scale_fill_manual(values = mycols) +
theme_void()+
labs(title = "Property Area of Loan Train")c. Tree Map
library(ggplot2)
library(treemapify)
library(scales)
plotdata <- df %>%
count(Property_Area)
ggplot(plotdata, aes(fill = Property_Area, area = n)) +
geom_treemap() +
labs(title = "Property Area of Loan Train")2.1.2 Numerik
a. Histogram
library(ggplot2)
ggplot(df, aes(x = ApplicantIncome)) +
geom_histogram(fill = "#C04343", color = "white", bins = 20) +
theme_minimal() +
labs(title="Applicant Income of Loan Train", x = "Applicant Income")b. Kernel Density Dot
library(ggplot2)
ggplot(df, aes(x = ApplicantIncome)) +
geom_density(fill = "#C04343") +
theme_minimal() +
labs(title = "Applicant Income of Loan Train")c. Dot Plot
library(ggplot2)
ggplot(df, aes(x = ApplicantIncome)) +
geom_dotplot(fill = "#C04343", color = "azure4") +
theme_minimal() +
labs(title = "Applicant Income of Loan Train", y = "Proportion", x = "Applicant Income")2.2 Visualisasi Bivariabel
2.2.1 Kategori vs Kategori
Grouped Bar Chart
library(ggplot2)
ggplot(df, aes(x = Gender, fill = Married)) +
theme_minimal() +
geom_bar(position = position_dodge(preserve = "single"))2.2.2 Numerik vs Numerik
Scatterplot Fit Lines
library(ggplot2)
ggplot(df,
aes(x = ApplicantIncome,
y = CoapplicantIncome)) +
geom_point(color= "#C04343") +
geom_smooth(method = "lm", color = "brown1")+
theme_minimal() +
labs(x = "Applicant Income",
y = "Coapplicant Income",
title = "Applicant Income vs. Coapplicant Income")2.2.3 Kategori vs Numerik
Grouped Kernel Density Plots
library(ggplot2)
ggplot(df,
aes(x = ApplicantIncome,
fill = Gender)) +
geom_density(alpha = 0.4) +
theme_minimal() +
labs(title = "Applicant Income distribution by Gender")2.3 Visualisasi Multivariabel
2.3.1 Grouping
library(carData)
library(ggplot2)
data(df, package="carData")
ggplot(df, aes(x = ApplicantIncome,
y = Gender,
color=LoanAmount)) +
geom_point() +
theme_minimal() +
labs(title = "Applicant Income by Gender and Loan Amount")2.3.2 Faceting
library(carData)
library(ggplot2)
ggplot(df, aes(x = ApplicantIncome)) +
geom_histogram(fill = "#C04343",
color = "white") +
facet_wrap(~Gender, ncol = 1) +
theme_minimal() +
labs(title = "Applicant Income by Gender")Tugas 3
3.1 Kualitatif
3.1.1 Kategori Univariat
library(readr)
df= read_csv("loan-train.csv")
spec(df)## cols(
## Loan_ID = col_character(),
## Gender = col_character(),
## Married = col_character(),
## Dependents = col_character(),
## Education = col_character(),
## Self_Employed = col_character(),
## ApplicantIncome = col_double(),
## CoapplicantIncome = col_double(),
## LoanAmount = col_double(),
## Loan_Amount_Term = col_double(),
## Credit_History = col_double(),
## Property_Area = col_character(),
## Loan_Status = col_character()
## )
apply(is.na(df),2, which)## $Loan_ID
## integer(0)
##
## $Gender
## [1] 24 127 172 189 315 335 461 468 478 508 577 589 593
##
## $Married
## [1] 105 229 436
##
## $Dependents
## [1] 103 105 121 227 229 294 302 333 336 347 356 436 518 572 598
##
## $Education
## integer(0)
##
## $Self_Employed
## [1] 12 20 25 30 31 96 108 112 115 159 171 219 232 237 269 296 334 337 345
## [20] 375 381 386 412 433 448 464 469 536 543 580 601 602
##
## $ApplicantIncome
## integer(0)
##
## $CoapplicantIncome
## integer(0)
##
## $LoanAmount
## [1] 1 36 64 82 96 103 104 114 128 203 285 306 323 339 388 436 438 480 525
## [20] 551 552 606
##
## $Loan_Amount_Term
## [1] 20 37 45 46 74 113 166 198 224 233 336 368 422 424
##
## $Credit_History
## [1] 17 25 31 43 80 84 87 96 118 126 130 131 157 182 188 199 220 237 238
## [20] 260 261 280 310 314 318 319 324 349 364 378 393 396 412 445 450 452 461 474
## [39] 491 492 498 504 507 531 534 545 557 566 584 601
##
## $Property_Area
## integer(0)
##
## $Loan_Status
## integer(0)
df<-na.omit(df)
head(df,3)Cat1 <- table(df$Gender)
Cat1##
## Female Male
## 86 394
prop.table(table(df$Gender)) ##
## Female Male
## 0.1791667 0.8208333
3.1.2 Kategori Bivariat
library(readr)
library(dplyr)
library(magrittr)
Cat2<- df %>%
select(Gender, Loan_ID) %>%
table()
Cat2## Loan_ID
## Gender LP001003 LP001005 LP001006 LP001008 LP001011 LP001013 LP001014
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001018 LP001020 LP001024 LP001028 LP001029 LP001030 LP001032
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001036 LP001038 LP001043 LP001046 LP001047 LP001066 LP001068
## Female 1 0 0 0 0 0 0
## Male 0 1 1 1 1 1 1
## Loan_ID
## Gender LP001073 LP001086 LP001095 LP001097 LP001098 LP001100 LP001112
## Female 0 0 0 0 0 0 1
## Male 1 1 1 1 1 1 0
## Loan_ID
## Gender LP001114 LP001116 LP001119 LP001120 LP001131 LP001138 LP001144
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001146 LP001151 LP001155 LP001157 LP001164 LP001179 LP001186
## Female 1 1 1 1 1 0 1
## Male 0 0 0 0 0 1 0
## Loan_ID
## Gender LP001194 LP001195 LP001197 LP001198 LP001199 LP001205 LP001206
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001207 LP001222 LP001225 LP001228 LP001233 LP001238 LP001241
## Female 0 1 0 0 0 0 1
## Male 1 0 1 1 1 1 0
## Loan_ID
## Gender LP001243 LP001245 LP001248 LP001253 LP001255 LP001256 LP001259
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001263 LP001265 LP001267 LP001275 LP001279 LP001282 LP001289
## Female 0 1 1 0 0 0 0
## Male 1 0 0 1 1 1 1
## Loan_ID
## Gender LP001310 LP001316 LP001318 LP001319 LP001322 LP001325 LP001327
## Female 0 0 0 0 0 0 1
## Male 1 1 1 1 1 1 0
## Loan_ID
## Gender LP001333 LP001334 LP001343 LP001345 LP001349 LP001367 LP001369
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001379 LP001384 LP001385 LP001401 LP001404 LP001421 LP001422
## Female 0 0 0 0 1 0 1
## Male 1 1 1 1 0 1 0
## Loan_ID
## Gender LP001430 LP001431 LP001432 LP001439 LP001451 LP001473 LP001478
## Female 1 1 0 0 0 0 0
## Male 0 0 1 1 1 1 1
## Loan_ID
## Gender LP001482 LP001487 LP001488 LP001489 LP001491 LP001492 LP001493
## Female 0 0 0 1 0 0 0
## Male 1 1 1 0 1 1 1
## Loan_ID
## Gender LP001497 LP001498 LP001504 LP001507 LP001508 LP001514 LP001516
## Female 0 0 0 0 0 1 1
## Male 1 1 1 1 1 0 0
## Loan_ID
## Gender LP001518 LP001519 LP001520 LP001528 LP001529 LP001531 LP001532
## Female 0 1 0 0 0 0 0
## Male 1 0 1 1 1 1 1
## Loan_ID
## Gender LP001535 LP001536 LP001543 LP001552 LP001560 LP001562 LP001565
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001570 LP001572 LP001577 LP001578 LP001579 LP001580 LP001586
## Female 0 0 1 0 0 0 0
## Male 1 1 0 1 1 1 1
## Loan_ID
## Gender LP001594 LP001603 LP001606 LP001608 LP001610 LP001616 LP001630
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001633 LP001636 LP001637 LP001639 LP001640 LP001641 LP001647
## Female 0 0 0 1 0 0 0
## Male 1 1 1 0 1 1 1
## Loan_ID
## Gender LP001653 LP001656 LP001657 LP001658 LP001664 LP001665 LP001666
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001673 LP001674 LP001677 LP001688 LP001691 LP001692 LP001693
## Female 0 0 0 0 0 1 1
## Male 1 1 1 1 1 0 0
## Loan_ID
## Gender LP001698 LP001699 LP001702 LP001708 LP001711 LP001713 LP001715
## Female 0 0 0 1 0 0 0
## Male 1 1 1 0 1 1 1
## Loan_ID
## Gender LP001716 LP001720 LP001722 LP001726 LP001736 LP001743 LP001744
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001750 LP001751 LP001758 LP001761 LP001765 LP001776 LP001778
## Female 0 0 0 0 0 1 0
## Male 1 1 1 1 1 0 1
## Loan_ID
## Gender LP001784 LP001790 LP001792 LP001798 LP001800 LP001806 LP001807
## Female 0 1 0 0 0 0 0
## Male 1 0 1 1 1 1 1
## Loan_ID
## Gender LP001811 LP001813 LP001814 LP001819 LP001824 LP001825 LP001835
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001836 LP001841 LP001843 LP001844 LP001846 LP001849 LP001854
## Female 1 0 0 0 1 0 0
## Male 0 1 1 1 0 1 1
## Loan_ID
## Gender LP001859 LP001868 LP001870 LP001871 LP001872 LP001875 LP001877
## Female 0 0 1 1 0 0 0
## Male 1 1 0 0 1 1 1
## Loan_ID
## Gender LP001882 LP001884 LP001888 LP001891 LP001892 LP001894 LP001896
## Female 0 1 1 0 0 0 0
## Male 1 0 0 1 1 1 1
## Loan_ID
## Gender LP001900 LP001903 LP001904 LP001907 LP001910 LP001914 LP001915
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP001917 LP001924 LP001925 LP001926 LP001931 LP001935 LP001936
## Female 1 0 1 0 1 0 0
## Male 0 1 0 1 0 1 1
## Loan_ID
## Gender LP001938 LP001940 LP001947 LP001953 LP001954 LP001955 LP001963
## Female 0 0 0 0 1 1 0
## Male 1 1 1 1 0 0 1
## Loan_ID
## Gender LP001964 LP001974 LP001977 LP001978 LP001993 LP001994 LP001996
## Female 0 1 0 0 1 1 0
## Male 1 0 1 1 0 0 1
## Loan_ID
## Gender LP002002 LP002004 LP002006 LP002031 LP002035 LP002050 LP002051
## Female 1 0 1 0 0 0 0
## Male 0 1 0 1 1 1 1
## Loan_ID
## Gender LP002053 LP002065 LP002067 LP002068 LP002082 LP002086 LP002087
## Female 0 0 0 0 0 1 1
## Male 1 1 1 1 1 0 0
## Loan_ID
## Gender LP002097 LP002098 LP002112 LP002114 LP002115 LP002116 LP002119
## Female 0 0 0 1 0 1 0
## Male 1 1 1 0 1 0 1
## Loan_ID
## Gender LP002126 LP002129 LP002131 LP002138 LP002139 LP002140 LP002141
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP002142 LP002143 LP002149 LP002151 LP002158 LP002160 LP002161
## Female 1 1 0 0 0 0 1
## Male 0 0 1 1 1 1 0
## Loan_ID
## Gender LP002170 LP002175 LP002180 LP002181 LP002187 LP002190 LP002191
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP002194 LP002197 LP002201 LP002205 LP002211 LP002219 LP002224
## Female 1 0 0 0 0 0 0
## Male 0 1 1 1 1 1 1
## Loan_ID
## Gender LP002225 LP002229 LP002231 LP002234 LP002236 LP002239 LP002244
## Female 0 0 1 0 0 0 0
## Male 1 1 0 1 1 1 1
## Loan_ID
## Gender LP002250 LP002255 LP002262 LP002265 LP002266 LP002277 LP002281
## Female 0 0 0 0 0 1 0
## Male 1 1 1 1 1 0 1
## Loan_ID
## Gender LP002284 LP002287 LP002288 LP002296 LP002297 LP002300 LP002301
## Female 0 1 0 0 0 1 1
## Male 1 0 1 1 1 0 0
## Loan_ID
## Gender LP002305 LP002308 LP002314 LP002315 LP002317 LP002318 LP002328
## Female 1 0 1 0 0 1 0
## Male 0 1 0 1 1 0 1
## Loan_ID
## Gender LP002332 LP002335 LP002337 LP002341 LP002342 LP002345 LP002347
## Female 0 1 1 1 0 0 0
## Male 1 0 0 0 1 1 1
## Loan_ID
## Gender LP002348 LP002361 LP002364 LP002366 LP002367 LP002368 LP002369
## Female 0 0 0 0 1 0 0
## Male 1 1 1 1 0 1 1
## Loan_ID
## Gender LP002370 LP002377 LP002379 LP002387 LP002390 LP002398 LP002403
## Female 0 1 0 0 0 0 0
## Male 1 0 1 1 1 1 1
## Loan_ID
## Gender LP002407 LP002408 LP002409 LP002418 LP002422 LP002429 LP002434
## Female 1 0 0 0 0 0 0
## Male 0 1 1 1 1 1 1
## Loan_ID
## Gender LP002443 LP002446 LP002448 LP002449 LP002453 LP002455 LP002459
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP002467 LP002472 LP002473 LP002484 LP002487 LP002493 LP002494
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP002500 LP002505 LP002515 LP002517 LP002519 LP002524 LP002527
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP002529 LP002531 LP002534 LP002536 LP002537 LP002541 LP002543
## Female 0 0 1 0 0 0 0
## Male 1 1 0 1 1 1 1
## Loan_ID
## Gender LP002544 LP002545 LP002547 LP002555 LP002556 LP002571 LP002582
## Female 0 0 0 0 0 0 1
## Male 1 1 1 1 1 1 0
## Loan_ID
## Gender LP002585 LP002586 LP002587 LP002600 LP002602 LP002603 LP002606
## Female 0 1 0 0 0 1 1
## Male 1 0 1 1 1 0 0
## Loan_ID
## Gender LP002615 LP002619 LP002622 LP002626 LP002634 LP002637 LP002640
## Female 0 0 0 0 1 0 0
## Male 1 1 1 1 0 1 1
## Loan_ID
## Gender LP002643 LP002648 LP002652 LP002659 LP002670 LP002683 LP002684
## Female 0 0 0 0 1 0 1
## Male 1 1 1 1 0 1 0
## Loan_ID
## Gender LP002689 LP002690 LP002692 LP002693 LP002699 LP002705 LP002706
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP002714 LP002716 LP002720 LP002723 LP002731 LP002734 LP002738
## Female 0 0 0 0 1 0 0
## Male 1 1 1 1 0 1 1
## Loan_ID
## Gender LP002739 LP002740 LP002741 LP002743 LP002755 LP002767 LP002768
## Female 0 0 1 1 0 0 0
## Male 1 1 0 0 1 1 1
## Loan_ID
## Gender LP002772 LP002776 LP002777 LP002785 LP002788 LP002789 LP002792
## Female 0 1 0 0 0 0 0
## Male 1 0 1 1 1 1 1
## Loan_ID
## Gender LP002795 LP002798 LP002804 LP002807 LP002813 LP002820 LP002821
## Female 0 0 1 0 1 0 0
## Male 1 1 0 1 0 1 1
## Loan_ID
## Gender LP002832 LP002836 LP002837 LP002840 LP002841 LP002842 LP002855
## Female 0 0 0 1 0 0 0
## Male 1 1 1 0 1 1 1
## Loan_ID
## Gender LP002862 LP002863 LP002868 LP002874 LP002877 LP002892 LP002893
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP002894 LP002911 LP002912 LP002916 LP002917 LP002926 LP002928
## Female 1 0 0 0 1 0 0
## Male 0 1 1 1 0 1 1
## Loan_ID
## Gender LP002931 LP002936 LP002938 LP002940 LP002941 LP002945 LP002948
## Female 0 0 0 0 0 0 0
## Male 1 1 1 1 1 1 1
## Loan_ID
## Gender LP002953 LP002958 LP002959 LP002961 LP002964 LP002974 LP002978
## Female 0 0 1 0 0 0 1
## Male 1 1 0 1 1 1 0
## Loan_ID
## Gender LP002979 LP002983 LP002984 LP002990
## Female 0 0 0 1
## Male 1 1 1 0
3.1.3 Kategori Multivariat
Cat3 <- df %>%
select(Gender, Loan_ID, Dependents) %>%
ftable()
Cat3## Dependents 0 1 2 3+
## Gender Loan_ID
## Female LP001003 0 0 0 0
## LP001005 0 0 0 0
## LP001006 0 0 0 0
## LP001008 0 0 0 0
## LP001011 0 0 0 0
## LP001013 0 0 0 0
## LP001014 0 0 0 0
## LP001018 0 0 0 0
## LP001020 0 0 0 0
## LP001024 0 0 0 0
## LP001028 0 0 0 0
## LP001029 0 0 0 0
## LP001030 0 0 0 0
## LP001032 0 0 0 0
## LP001036 1 0 0 0
## LP001038 0 0 0 0
## LP001043 0 0 0 0
## LP001046 0 0 0 0
## LP001047 0 0 0 0
## LP001066 0 0 0 0
## LP001068 0 0 0 0
## LP001073 0 0 0 0
## LP001086 0 0 0 0
## LP001095 0 0 0 0
## LP001097 0 0 0 0
## LP001098 0 0 0 0
## LP001100 0 0 0 0
## LP001112 1 0 0 0
## LP001114 0 0 0 0
## LP001116 0 0 0 0
## LP001119 0 0 0 0
## LP001120 0 0 0 0
## LP001131 0 0 0 0
## LP001138 0 0 0 0
## LP001144 0 0 0 0
## LP001146 1 0 0 0
## LP001151 1 0 0 0
## LP001155 1 0 0 0
## LP001157 1 0 0 0
## LP001164 1 0 0 0
## LP001179 0 0 0 0
## LP001186 0 1 0 0
## LP001194 0 0 0 0
## LP001195 0 0 0 0
## LP001197 0 0 0 0
## LP001198 0 0 0 0
## LP001199 0 0 0 0
## LP001205 0 0 0 0
## LP001206 0 0 0 0
## LP001207 0 0 0 0
## LP001222 1 0 0 0
## LP001225 0 0 0 0
## LP001228 0 0 0 0
## LP001233 0 0 0 0
## LP001238 0 0 0 0
## LP001241 1 0 0 0
## LP001243 0 0 0 0
## LP001245 0 0 0 0
## LP001248 0 0 0 0
## LP001253 0 0 0 0
## LP001255 0 0 0 0
## LP001256 0 0 0 0
## LP001259 0 0 0 0
## LP001263 0 0 0 0
## LP001265 1 0 0 0
## LP001267 0 0 1 0
## LP001275 0 0 0 0
## LP001279 0 0 0 0
## LP001282 0 0 0 0
## LP001289 0 0 0 0
## LP001310 0 0 0 0
## LP001316 0 0 0 0
## LP001318 0 0 0 0
## LP001319 0 0 0 0
## LP001322 0 0 0 0
## LP001325 0 0 0 0
## LP001327 1 0 0 0
## LP001333 0 0 0 0
## LP001334 0 0 0 0
## LP001343 0 0 0 0
## LP001345 0 0 0 0
## LP001349 0 0 0 0
## LP001367 0 0 0 0
## LP001369 0 0 0 0
## LP001379 0 0 0 0
## LP001384 0 0 0 0
## LP001385 0 0 0 0
## LP001401 0 0 0 0
## LP001404 1 0 0 0
## LP001421 0 0 0 0
## LP001422 1 0 0 0
## LP001430 1 0 0 0
## LP001431 1 0 0 0
## LP001432 0 0 0 0
## LP001439 0 0 0 0
## LP001451 0 0 0 0
## LP001473 0 0 0 0
## LP001478 0 0 0 0
## LP001482 0 0 0 0
## LP001487 0 0 0 0
## LP001488 0 0 0 0
## LP001489 1 0 0 0
## LP001491 0 0 0 0
## LP001492 0 0 0 0
## LP001493 0 0 0 0
## LP001497 0 0 0 0
## LP001498 0 0 0 0
## LP001504 0 0 0 0
## LP001507 0 0 0 0
## LP001508 0 0 0 0
## LP001514 1 0 0 0
## LP001516 0 0 1 0
## LP001518 0 0 0 0
## LP001519 1 0 0 0
## LP001520 0 0 0 0
## LP001528 0 0 0 0
## LP001529 0 0 0 0
## LP001531 0 0 0 0
## LP001532 0 0 0 0
## LP001535 0 0 0 0
## LP001536 0 0 0 0
## LP001543 0 0 0 0
## LP001552 0 0 0 0
## LP001560 0 0 0 0
## LP001562 0 0 0 0
## LP001565 0 0 0 0
## LP001570 0 0 0 0
## LP001572 0 0 0 0
## LP001577 1 0 0 0
## LP001578 0 0 0 0
## LP001579 0 0 0 0
## LP001580 0 0 0 0
## LP001586 0 0 0 0
## LP001594 0 0 0 0
## LP001603 0 0 0 0
## LP001606 0 0 0 0
## LP001608 0 0 0 0
## LP001610 0 0 0 0
## LP001616 0 0 0 0
## LP001630 0 0 0 0
## LP001633 0 0 0 0
## LP001636 0 0 0 0
## LP001637 0 0 0 0
## LP001639 1 0 0 0
## LP001640 0 0 0 0
## LP001641 0 0 0 0
## LP001647 0 0 0 0
## LP001653 0 0 0 0
## LP001656 0 0 0 0
## LP001657 0 0 0 0
## LP001658 0 0 0 0
## LP001664 0 0 0 0
## LP001665 0 0 0 0
## LP001666 0 0 0 0
## LP001673 0 0 0 0
## LP001674 0 0 0 0
## LP001677 0 0 0 0
## LP001688 0 0 0 0
## LP001691 0 0 0 0
## LP001692 1 0 0 0
## LP001693 1 0 0 0
## LP001698 0 0 0 0
## LP001699 0 0 0 0
## LP001702 0 0 0 0
## LP001708 1 0 0 0
## LP001711 0 0 0 0
## LP001713 0 0 0 0
## LP001715 0 0 0 0
## LP001716 0 0 0 0
## LP001720 0 0 0 0
## LP001722 0 0 0 0
## LP001726 0 0 0 0
## LP001736 0 0 0 0
## LP001743 0 0 0 0
## LP001744 0 0 0 0
## LP001750 0 0 0 0
## LP001751 0 0 0 0
## LP001758 0 0 0 0
## LP001761 0 0 0 0
## LP001765 0 0 0 0
## LP001776 1 0 0 0
## LP001778 0 0 0 0
## LP001784 0 0 0 0
## LP001790 0 1 0 0
## LP001792 0 0 0 0
## LP001798 0 0 0 0
## LP001800 0 0 0 0
## LP001806 0 0 0 0
## LP001807 0 0 0 0
## LP001811 0 0 0 0
## LP001813 0 0 0 0
## LP001814 0 0 0 0
## LP001819 0 0 0 0
## LP001824 0 0 0 0
## LP001825 0 0 0 0
## LP001835 0 0 0 0
## LP001836 0 0 1 0
## LP001841 0 0 0 0
## LP001843 0 0 0 0
## LP001844 0 0 0 0
## LP001846 0 0 0 1
## LP001849 0 0 0 0
## LP001854 0 0 0 0
## LP001859 0 0 0 0
## LP001868 0 0 0 0
## LP001870 0 1 0 0
## LP001871 1 0 0 0
## LP001872 0 0 0 0
## LP001875 0 0 0 0
## LP001877 0 0 0 0
## LP001882 0 0 0 0
## LP001884 0 1 0 0
## LP001888 1 0 0 0
## LP001891 0 0 0 0
## LP001892 0 0 0 0
## LP001894 0 0 0 0
## LP001896 0 0 0 0
## LP001900 0 0 0 0
## LP001903 0 0 0 0
## LP001904 0 0 0 0
## LP001907 0 0 0 0
## LP001910 0 0 0 0
## LP001914 0 0 0 0
## LP001915 0 0 0 0
## LP001917 1 0 0 0
## LP001924 0 0 0 0
## LP001925 1 0 0 0
## LP001926 0 0 0 0
## LP001931 1 0 0 0
## LP001935 0 0 0 0
## LP001936 0 0 0 0
## LP001938 0 0 0 0
## LP001940 0 0 0 0
## LP001947 0 0 0 0
## LP001953 0 0 0 0
## LP001954 0 1 0 0
## LP001955 1 0 0 0
## LP001963 0 0 0 0
## LP001964 0 0 0 0
## LP001974 1 0 0 0
## LP001977 0 0 0 0
## LP001978 0 0 0 0
## LP001993 1 0 0 0
## LP001994 1 0 0 0
## LP001996 0 0 0 0
## LP002002 1 0 0 0
## LP002004 0 0 0 0
## LP002006 1 0 0 0
## LP002031 0 0 0 0
## LP002035 0 0 0 0
## LP002050 0 0 0 0
## LP002051 0 0 0 0
## LP002053 0 0 0 0
## LP002065 0 0 0 0
## LP002067 0 0 0 0
## LP002068 0 0 0 0
## LP002082 0 0 0 0
## LP002086 1 0 0 0
## LP002087 1 0 0 0
## LP002097 0 0 0 0
## LP002098 0 0 0 0
## LP002112 0 0 0 0
## LP002114 1 0 0 0
## LP002115 0 0 0 0
## LP002116 1 0 0 0
## LP002119 0 0 0 0
## LP002126 0 0 0 0
## LP002129 0 0 0 0
## LP002131 0 0 0 0
## LP002138 0 0 0 0
## LP002139 0 0 0 0
## LP002140 0 0 0 0
## LP002141 0 0 0 0
## LP002142 1 0 0 0
## LP002143 1 0 0 0
## LP002149 0 0 0 0
## LP002151 0 0 0 0
## LP002158 0 0 0 0
## LP002160 0 0 0 0
## LP002161 0 1 0 0
## LP002170 0 0 0 0
## LP002175 0 0 0 0
## LP002180 0 0 0 0
## LP002181 0 0 0 0
## LP002187 0 0 0 0
## LP002190 0 0 0 0
## LP002191 0 0 0 0
## LP002194 1 0 0 0
## LP002197 0 0 0 0
## LP002201 0 0 0 0
## LP002205 0 0 0 0
## LP002211 0 0 0 0
## LP002219 0 0 0 0
## LP002224 0 0 0 0
## LP002225 0 0 0 0
## LP002229 0 0 0 0
## LP002231 1 0 0 0
## LP002234 0 0 0 0
## LP002236 0 0 0 0
## LP002239 0 0 0 0
## LP002244 0 0 0 0
## LP002250 0 0 0 0
## LP002255 0 0 0 0
## LP002262 0 0 0 0
## LP002265 0 0 0 0
## LP002266 0 0 0 0
## LP002277 1 0 0 0
## LP002281 0 0 0 0
## LP002284 0 0 0 0
## LP002287 1 0 0 0
## LP002288 0 0 0 0
## LP002296 0 0 0 0
## LP002297 0 0 0 0
## LP002300 1 0 0 0
## LP002301 1 0 0 0
## LP002305 1 0 0 0
## LP002308 0 0 0 0
## LP002314 1 0 0 0
## LP002315 0 0 0 0
## LP002317 0 0 0 0
## LP002318 0 1 0 0
## LP002328 0 0 0 0
## LP002332 0 0 0 0
## LP002335 1 0 0 0
## LP002337 1 0 0 0
## LP002341 0 1 0 0
## LP002342 0 0 0 0
## LP002345 0 0 0 0
## LP002347 0 0 0 0
## LP002348 0 0 0 0
## LP002361 0 0 0 0
## LP002364 0 0 0 0
## LP002366 0 0 0 0
## LP002367 0 1 0 0
## LP002368 0 0 0 0
## LP002369 0 0 0 0
## LP002370 0 0 0 0
## LP002377 0 1 0 0
## LP002379 0 0 0 0
## LP002387 0 0 0 0
## LP002390 0 0 0 0
## LP002398 0 0 0 0
## LP002403 0 0 0 0
## LP002407 1 0 0 0
## LP002408 0 0 0 0
## LP002409 0 0 0 0
## LP002418 0 0 0 0
## LP002422 0 0 0 0
## LP002429 0 0 0 0
## LP002434 0 0 0 0
## LP002443 0 0 0 0
## LP002446 0 0 0 0
## LP002448 0 0 0 0
## LP002449 0 0 0 0
## LP002453 0 0 0 0
## LP002455 0 0 0 0
## LP002459 0 0 0 0
## LP002467 0 0 0 0
## LP002472 0 0 0 0
## LP002473 0 0 0 0
## LP002484 0 0 0 0
## LP002487 0 0 0 0
## LP002493 0 0 0 0
## LP002494 0 0 0 0
## LP002500 0 0 0 0
## LP002505 0 0 0 0
## LP002515 0 0 0 0
## LP002517 0 0 0 0
## LP002519 0 0 0 0
## LP002524 0 0 0 0
## LP002527 0 0 0 0
## LP002529 0 0 0 0
## LP002531 0 0 0 0
## LP002534 1 0 0 0
## LP002536 0 0 0 0
## LP002537 0 0 0 0
## LP002541 0 0 0 0
## LP002543 0 0 0 0
## LP002544 0 0 0 0
## LP002545 0 0 0 0
## LP002547 0 0 0 0
## LP002555 0 0 0 0
## LP002556 0 0 0 0
## LP002571 0 0 0 0
## LP002582 1 0 0 0
## LP002585 0 0 0 0
## LP002586 0 1 0 0
## LP002587 0 0 0 0
## LP002600 0 0 0 0
## LP002602 0 0 0 0
## LP002603 1 0 0 0
## LP002606 1 0 0 0
## LP002615 0 0 0 0
## LP002619 0 0 0 0
## LP002622 0 0 0 0
## LP002626 0 0 0 0
## LP002634 0 1 0 0
## LP002637 0 0 0 0
## LP002640 0 0 0 0
## LP002643 0 0 0 0
## LP002648 0 0 0 0
## LP002652 0 0 0 0
## LP002659 0 0 0 0
## LP002670 0 0 1 0
## LP002683 0 0 0 0
## LP002684 1 0 0 0
## LP002689 0 0 0 0
## LP002690 0 0 0 0
## LP002692 0 0 0 0
## LP002693 0 0 0 0
## LP002699 0 0 0 0
## LP002705 0 0 0 0
## LP002706 0 0 0 0
## LP002714 0 0 0 0
## LP002716 0 0 0 0
## LP002720 0 0 0 0
## LP002723 0 0 0 0
## LP002731 1 0 0 0
## LP002734 0 0 0 0
## LP002738 0 0 0 0
## LP002739 0 0 0 0
## LP002740 0 0 0 0
## LP002741 0 1 0 0
## LP002743 1 0 0 0
## LP002755 0 0 0 0
## LP002767 0 0 0 0
## LP002768 0 0 0 0
## LP002772 0 0 0 0
## LP002776 1 0 0 0
## LP002777 0 0 0 0
## LP002785 0 0 0 0
## LP002788 0 0 0 0
## LP002789 0 0 0 0
## LP002792 0 0 0 0
## LP002795 0 0 0 0
## LP002798 0 0 0 0
## LP002804 1 0 0 0
## LP002807 0 0 0 0
## LP002813 0 1 0 0
## LP002820 0 0 0 0
## LP002821 0 0 0 0
## LP002832 0 0 0 0
## LP002836 0 0 0 0
## LP002837 0 0 0 0
## LP002840 1 0 0 0
## LP002841 0 0 0 0
## LP002842 0 0 0 0
## LP002855 0 0 0 0
## LP002862 0 0 0 0
## LP002863 0 0 0 0
## LP002868 0 0 0 0
## LP002874 0 0 0 0
## LP002877 0 0 0 0
## LP002892 0 0 0 0
## LP002893 0 0 0 0
## LP002894 1 0 0 0
## LP002911 0 0 0 0
## LP002912 0 0 0 0
## LP002916 0 0 0 0
## LP002917 1 0 0 0
## LP002926 0 0 0 0
## LP002928 0 0 0 0
## LP002931 0 0 0 0
## LP002936 0 0 0 0
## LP002938 0 0 0 0
## LP002940 0 0 0 0
## LP002941 0 0 0 0
## LP002945 0 0 0 0
## LP002948 0 0 0 0
## LP002953 0 0 0 0
## LP002958 0 0 0 0
## LP002959 0 1 0 0
## LP002961 0 0 0 0
## LP002964 0 0 0 0
## LP002974 0 0 0 0
## LP002978 1 0 0 0
## LP002979 0 0 0 0
## LP002983 0 0 0 0
## LP002984 0 0 0 0
## LP002990 1 0 0 0
## Male LP001003 0 1 0 0
## LP001005 1 0 0 0
## LP001006 1 0 0 0
## LP001008 1 0 0 0
## LP001011 0 0 1 0
## LP001013 1 0 0 0
## LP001014 0 0 0 1
## LP001018 0 0 1 0
## LP001020 0 1 0 0
## LP001024 0 0 1 0
## LP001028 0 0 1 0
## LP001029 1 0 0 0
## LP001030 0 0 1 0
## LP001032 1 0 0 0
## LP001036 0 0 0 0
## LP001038 1 0 0 0
## LP001043 1 0 0 0
## LP001046 0 1 0 0
## LP001047 1 0 0 0
## LP001066 1 0 0 0
## LP001068 1 0 0 0
## LP001073 0 0 1 0
## LP001086 1 0 0 0
## LP001095 1 0 0 0
## LP001097 0 1 0 0
## LP001098 1 0 0 0
## LP001100 0 0 0 1
## LP001112 0 0 0 0
## LP001114 1 0 0 0
## LP001116 1 0 0 0
## LP001119 1 0 0 0
## LP001120 1 0 0 0
## LP001131 1 0 0 0
## LP001138 0 1 0 0
## LP001144 1 0 0 0
## LP001146 0 0 0 0
## LP001151 0 0 0 0
## LP001155 0 0 0 0
## LP001157 0 0 0 0
## LP001164 0 0 0 0
## LP001179 0 0 1 0
## LP001186 0 0 0 0
## LP001194 0 0 1 0
## LP001195 1 0 0 0
## LP001197 1 0 0 0
## LP001198 0 1 0 0
## LP001199 0 0 1 0
## LP001205 1 0 0 0
## LP001206 0 0 0 1
## LP001207 1 0 0 0
## LP001222 0 0 0 0
## LP001225 1 0 0 0
## LP001228 1 0 0 0
## LP001233 0 1 0 0
## LP001238 0 0 0 1
## LP001241 0 0 0 0
## LP001243 1 0 0 0
## LP001245 0 0 1 0
## LP001248 1 0 0 0
## LP001253 0 0 0 1
## LP001255 1 0 0 0
## LP001256 1 0 0 0
## LP001259 0 1 0 0
## LP001263 0 0 0 1
## LP001265 0 0 0 0
## LP001267 0 0 0 0
## LP001275 0 1 0 0
## LP001279 1 0 0 0
## LP001282 1 0 0 0
## LP001289 1 0 0 0
## LP001310 1 0 0 0
## LP001316 1 0 0 0
## LP001318 0 0 1 0
## LP001319 0 0 1 0
## LP001322 1 0 0 0
## LP001325 1 0 0 0
## LP001327 0 0 0 0
## LP001333 1 0 0 0
## LP001334 1 0 0 0
## LP001343 1 0 0 0
## LP001345 0 0 1 0
## LP001349 1 0 0 0
## LP001367 0 1 0 0
## LP001369 0 0 1 0
## LP001379 0 0 1 0
## LP001384 0 0 0 1
## LP001385 1 0 0 0
## LP001401 0 1 0 0
## LP001404 0 0 0 0
## LP001421 1 0 0 0
## LP001422 0 0 0 0
## LP001430 0 0 0 0
## LP001431 0 0 0 0
## LP001432 0 0 1 0
## LP001439 1 0 0 0
## LP001451 0 1 0 0
## LP001473 1 0 0 0
## LP001478 1 0 0 0
## LP001482 1 0 0 0
## LP001487 1 0 0 0
## LP001488 0 0 0 1
## LP001489 0 0 0 0
## LP001491 0 0 1 0
## LP001492 1 0 0 0
## LP001493 0 0 1 0
## LP001497 0 0 1 0
## LP001498 1 0 0 0
## LP001504 1 0 0 0
## LP001507 1 0 0 0
## LP001508 0 0 1 0
## LP001514 0 0 0 0
## LP001516 0 0 0 0
## LP001518 0 1 0 0
## LP001519 0 0 0 0
## LP001520 1 0 0 0
## LP001528 1 0 0 0
## LP001529 1 0 0 0
## LP001531 1 0 0 0
## LP001532 0 0 1 0
## LP001535 1 0 0 0
## LP001536 0 0 0 1
## LP001543 0 1 0 0
## LP001552 1 0 0 0
## LP001560 1 0 0 0
## LP001562 1 0 0 0
## LP001565 0 1 0 0
## LP001570 0 0 1 0
## LP001572 1 0 0 0
## LP001577 0 0 0 0
## LP001578 1 0 0 0
## LP001579 1 0 0 0
## LP001580 0 0 1 0
## LP001586 0 0 0 1
## LP001594 1 0 0 0
## LP001603 1 0 0 0
## LP001606 1 0 0 0
## LP001608 0 0 1 0
## LP001610 0 0 0 1
## LP001616 0 1 0 0
## LP001630 1 0 0 0
## LP001633 0 1 0 0
## LP001636 1 0 0 0
## LP001637 0 1 0 0
## LP001639 0 0 0 0
## LP001640 1 0 0 0
## LP001641 0 1 0 0
## LP001647 1 0 0 0
## LP001653 1 0 0 0
## LP001656 1 0 0 0
## LP001657 1 0 0 0
## LP001658 1 0 0 0
## LP001664 1 0 0 0
## LP001665 0 1 0 0
## LP001666 1 0 0 0
## LP001673 1 0 0 0
## LP001674 0 1 0 0
## LP001677 0 0 1 0
## LP001688 0 1 0 0
## LP001691 0 0 1 0
## LP001692 0 0 0 0
## LP001693 0 0 0 0
## LP001698 1 0 0 0
## LP001699 1 0 0 0
## LP001702 1 0 0 0
## LP001708 0 0 0 0
## LP001711 0 0 0 1
## LP001713 0 1 0 0
## LP001715 0 0 0 1
## LP001716 1 0 0 0
## LP001720 0 0 0 1
## LP001722 1 0 0 0
## LP001726 1 0 0 0
## LP001736 1 0 0 0
## LP001743 0 0 1 0
## LP001744 1 0 0 0
## LP001750 1 0 0 0
## LP001751 1 0 0 0
## LP001758 0 0 1 0
## LP001761 1 0 0 0
## LP001765 0 1 0 0
## LP001776 0 0 0 0
## LP001778 0 1 0 0
## LP001784 0 1 0 0
## LP001790 0 0 0 0
## LP001792 0 1 0 0
## LP001798 0 0 1 0
## LP001800 0 1 0 0
## LP001806 1 0 0 0
## LP001807 0 0 1 0
## LP001811 1 0 0 0
## LP001813 1 0 0 0
## LP001814 0 0 1 0
## LP001819 0 1 0 0
## LP001824 0 1 0 0
## LP001825 1 0 0 0
## LP001835 1 0 0 0
## LP001836 0 0 0 0
## LP001841 1 0 0 0
## LP001843 0 1 0 0
## LP001844 1 0 0 0
## LP001846 0 0 0 0
## LP001849 1 0 0 0
## LP001854 0 0 0 1
## LP001859 1 0 0 0
## LP001868 1 0 0 0
## LP001870 0 0 0 0
## LP001871 0 0 0 0
## LP001872 1 0 0 0
## LP001875 1 0 0 0
## LP001877 0 0 1 0
## LP001882 0 0 0 1
## LP001884 0 0 0 0
## LP001888 0 0 0 0
## LP001891 1 0 0 0
## LP001892 1 0 0 0
## LP001894 1 0 0 0
## LP001896 0 0 1 0
## LP001900 0 1 0 0
## LP001903 1 0 0 0
## LP001904 1 0 0 0
## LP001907 1 0 0 0
## LP001910 0 1 0 0
## LP001914 1 0 0 0
## LP001915 0 0 1 0
## LP001917 0 0 0 0
## LP001924 1 0 0 0
## LP001925 0 0 0 0
## LP001926 1 0 0 0
## LP001931 0 0 0 0
## LP001935 1 0 0 0
## LP001936 1 0 0 0
## LP001938 0 0 1 0
## LP001940 0 0 1 0
## LP001947 1 0 0 0
## LP001953 0 1 0 0
## LP001954 0 0 0 0
## LP001955 0 0 0 0
## LP001963 0 1 0 0
## LP001964 1 0 0 0
## LP001974 0 0 0 0
## LP001977 0 1 0 0
## LP001978 1 0 0 0
## LP001993 0 0 0 0
## LP001994 0 0 0 0
## LP001996 1 0 0 0
## LP002002 0 0 0 0
## LP002004 1 0 0 0
## LP002006 0 0 0 0
## LP002031 0 1 0 0
## LP002035 0 0 1 0
## LP002050 0 1 0 0
## LP002051 1 0 0 0
## LP002053 0 0 0 1
## LP002065 0 0 0 1
## LP002067 0 1 0 0
## LP002068 1 0 0 0
## LP002082 1 0 0 0
## LP002086 0 0 0 0
## LP002087 0 0 0 0
## LP002097 0 1 0 0
## LP002098 1 0 0 0
## LP002112 0 0 1 0
## LP002114 0 0 0 0
## LP002115 0 0 0 1
## LP002116 0 0 0 0
## LP002119 0 1 0 0
## LP002126 0 0 0 1
## LP002129 1 0 0 0
## LP002131 0 0 1 0
## LP002138 1 0 0 0
## LP002139 1 0 0 0
## LP002140 1 0 0 0
## LP002141 0 0 0 1
## LP002142 0 0 0 0
## LP002143 0 0 0 0
## LP002149 0 0 1 0
## LP002151 0 1 0 0
## LP002158 1 0 0 0
## LP002160 0 0 0 1
## LP002161 0 0 0 0
## LP002170 0 0 1 0
## LP002175 1 0 0 0
## LP002180 1 0 0 0
## LP002181 1 0 0 0
## LP002187 1 0 0 0
## LP002190 0 1 0 0
## LP002191 1 0 0 0
## LP002194 0 0 0 0
## LP002197 0 0 1 0
## LP002201 0 0 1 0
## LP002205 0 1 0 0
## LP002211 1 0 0 0
## LP002219 0 0 0 1
## LP002224 1 0 0 0
## LP002225 0 0 1 0
## LP002229 1 0 0 0
## LP002231 0 0 0 0
## LP002234 1 0 0 0
## LP002236 0 0 1 0
## LP002239 1 0 0 0
## LP002244 1 0 0 0
## LP002250 1 0 0 0
## LP002255 0 0 0 1
## LP002262 0 0 0 1
## LP002265 0 0 1 0
## LP002266 0 0 1 0
## LP002277 0 0 0 0
## LP002281 1 0 0 0
## LP002284 1 0 0 0
## LP002287 0 0 0 0
## LP002288 0 0 1 0
## LP002296 1 0 0 0
## LP002297 1 0 0 0
## LP002300 0 0 0 0
## LP002301 0 0 0 0
## LP002305 0 0 0 0
## LP002308 1 0 0 0
## LP002314 0 0 0 0
## LP002315 0 1 0 0
## LP002317 0 0 0 1
## LP002318 0 0 0 0
## LP002328 1 0 0 0
## LP002332 1 0 0 0
## LP002335 0 0 0 0
## LP002337 0 0 0 0
## LP002341 0 0 0 0
## LP002342 0 0 1 0
## LP002345 1 0 0 0
## LP002347 1 0 0 0
## LP002348 1 0 0 0
## LP002361 1 0 0 0
## LP002364 1 0 0 0
## LP002366 1 0 0 0
## LP002367 0 0 0 0
## LP002368 0 0 1 0
## LP002369 1 0 0 0
## LP002370 1 0 0 0
## LP002377 0 0 0 0
## LP002379 1 0 0 0
## LP002387 1 0 0 0
## LP002390 1 0 0 0
## LP002398 1 0 0 0
## LP002403 1 0 0 0
## LP002407 0 0 0 0
## LP002408 1 0 0 0
## LP002409 1 0 0 0
## LP002418 0 0 0 1
## LP002422 0 1 0 0
## LP002429 0 1 0 0
## LP002434 0 0 1 0
## LP002443 0 0 1 0
## LP002446 0 0 1 0
## LP002448 1 0 0 0
## LP002449 1 0 0 0
## LP002453 1 0 0 0
## LP002455 0 0 1 0
## LP002459 1 0 0 0
## LP002467 1 0 0 0
## LP002472 0 0 1 0
## LP002473 1 0 0 0
## LP002484 0 0 0 1
## LP002487 1 0 0 0
## LP002493 1 0 0 0
## LP002494 1 0 0 0
## LP002500 0 0 0 1
## LP002505 1 0 0 0
## LP002515 0 1 0 0
## LP002517 0 1 0 0
## LP002519 0 0 0 1
## LP002524 0 0 1 0
## LP002527 0 0 1 0
## LP002529 0 0 1 0
## LP002531 0 1 0 0
## LP002534 0 0 0 0
## LP002536 0 0 0 1
## LP002537 1 0 0 0
## LP002541 1 0 0 0
## LP002543 0 0 1 0
## LP002544 0 1 0 0
## LP002545 0 0 1 0
## LP002547 0 1 0 0
## LP002555 0 0 1 0
## LP002556 1 0 0 0
## LP002571 1 0 0 0
## LP002582 0 0 0 0
## LP002585 1 0 0 0
## LP002586 0 0 0 0
## LP002587 1 0 0 0
## LP002600 0 1 0 0
## LP002602 1 0 0 0
## LP002603 0 0 0 0
## LP002606 0 0 0 0
## LP002615 0 0 1 0
## LP002619 1 0 0 0
## LP002622 0 0 1 0
## LP002626 1 0 0 0
## LP002634 0 0 0 0
## LP002637 1 0 0 0
## LP002640 0 1 0 0
## LP002643 0 0 1 0
## LP002648 1 0 0 0
## LP002652 1 0 0 0
## LP002659 0 0 0 1
## LP002670 0 0 0 0
## LP002683 1 0 0 0
## LP002684 0 0 0 0
## LP002689 0 0 1 0
## LP002690 1 0 0 0
## LP002692 0 0 0 1
## LP002693 0 0 1 0
## LP002699 0 0 1 0
## LP002705 1 0 0 0
## LP002706 0 1 0 0
## LP002714 0 1 0 0
## LP002716 1 0 0 0
## LP002720 0 0 0 1
## LP002723 0 0 1 0
## LP002731 0 0 0 0
## LP002734 1 0 0 0
## LP002738 0 0 1 0
## LP002739 1 0 0 0
## LP002740 0 0 0 1
## LP002741 0 0 0 0
## LP002743 0 0 0 0
## LP002755 0 1 0 0
## LP002767 1 0 0 0
## LP002768 1 0 0 0
## LP002772 1 0 0 0
## LP002776 0 0 0 0
## LP002777 1 0 0 0
## LP002785 0 1 0 0
## LP002788 1 0 0 0
## LP002789 1 0 0 0
## LP002792 0 1 0 0
## LP002795 0 0 0 1
## LP002798 1 0 0 0
## LP002804 0 0 0 0
## LP002807 0 0 1 0
## LP002813 0 0 0 0
## LP002820 1 0 0 0
## LP002821 1 0 0 0
## LP002832 0 0 1 0
## LP002836 1 0 0 0
## LP002837 0 0 0 1
## LP002840 0 0 0 0
## LP002841 1 0 0 0
## LP002842 0 1 0 0
## LP002855 0 0 1 0
## LP002862 0 0 1 0
## LP002863 0 0 0 1
## LP002868 0 0 1 0
## LP002874 1 0 0 0
## LP002877 0 1 0 0
## LP002892 0 0 1 0
## LP002893 1 0 0 0
## LP002894 0 0 0 0
## LP002911 0 1 0 0
## LP002912 0 1 0 0
## LP002916 1 0 0 0
## LP002917 0 0 0 0
## LP002926 0 0 1 0
## LP002928 1 0 0 0
## LP002931 0 0 1 0
## LP002936 1 0 0 0
## LP002938 1 0 0 0
## LP002940 1 0 0 0
## LP002941 0 0 1 0
## LP002945 1 0 0 0
## LP002948 0 0 1 0
## LP002953 0 0 0 1
## LP002958 1 0 0 0
## LP002959 0 0 0 0
## LP002961 0 1 0 0
## LP002964 0 0 1 0
## LP002974 1 0 0 0
## LP002978 0 0 0 0
## LP002979 0 0 0 1
## LP002983 0 1 0 0
## LP002984 0 0 1 0
## LP002990 0 0 0 0
3.2 Kuantitatif
3.2.1 Univariat Numerik
a. Measure of Central Tendency
Quan <- df %>%
select_if(is.numeric)
names(Quan)## [1] "ApplicantIncome" "CoapplicantIncome" "LoanAmount"
## [4] "Loan_Amount_Term" "Credit_History"
mean(Quan$Self_Employed)## [1] NA
quantile(Quan$Self_Employed)## 0% 25% 50% 75% 100%
## NA NA NA NA NA
median(Quan$Self_Employed)## NULL
mode(Quan$Self_Employed)## [1] "NULL"
summary(Quan)## ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term
## Min. : 150 Min. : 0 Min. : 9.0 Min. : 36.0
## 1st Qu.: 2899 1st Qu.: 0 1st Qu.:100.0 1st Qu.:360.0
## Median : 3859 Median : 1084 Median :128.0 Median :360.0
## Mean : 5364 Mean : 1581 Mean :144.7 Mean :342.1
## 3rd Qu.: 5852 3rd Qu.: 2253 3rd Qu.:170.0 3rd Qu.:360.0
## Max. :81000 Max. :33837 Max. :600.0 Max. :480.0
## Credit_History
## Min. :0.0000
## 1st Qu.:1.0000
## Median :1.0000
## Mean :0.8542
## 3rd Qu.:1.0000
## Max. :1.0000
b. Scale
var(Quan$CoapplicantIncome) ## [1] 6852313
sd(Quan$CoapplicantIncome)## [1] 2617.692
mad(Quan$CoapplicantIncome)## [1] 1607.88
IQR(Quan$CoapplicantIncome) ## [1] 2253.25
c. Skewness
library(e1071) # load e1071
skewness(Quan$CoapplicantIncome) ## [1] 5.844913
d. Kurtosis
kurtosis(Quan$CoapplicantIncome) ## [1] 56.79679
3.2.2 Bivariat Numerik
a. Covariance
cov(Quan$CoapplicantIncome,Quan$ApplicantIncome)## [1] -1670551
b. Pearson’s Correlation Coefficient
cor(Quan$CoapplicantIncome,Quan$ApplicantIncome)## [1] -0.112588
c. Z-Score
zscore=(Quan$CoapplicantIncome-mean(Quan$CoapplicantIncome))/sd(Quan$CoapplicantIncome)3.2.3 Multivariat Numerik
a. Sample Covariance Matrix
cov(Quan)## ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome 32129072.2408 -1.670551e+06 226029.825404
## CoapplicantIncome -1670550.7308 6.852313e+06 40197.560179
## LoanAmount 226029.8254 4.019756e+04 6481.564505
## Loan_Amount_Term -4006.1953 -9.857739e+02 267.057098
## Credit_History -112.4526 -8.038516e+00 -1.159751
## Loan_Amount_Term Credit_History
## ApplicantIncome -4006.1953027 -112.4526357
## CoapplicantIncome -985.7738706 -8.0385160
## LoanAmount 267.0570981 -1.1597512
## Loan_Amount_Term 4252.6572025 0.7588727
## Credit_History 0.7588727 0.1248260
b. Sample Correlation Matrix
cor(Quan)## ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome 1.00000000 -0.112587969 0.49530959
## CoapplicantIncome -0.11258797 1.000000000 0.19073974
## LoanAmount 0.49530959 0.190739737 1.00000000
## Loan_Amount_Term -0.01083809 -0.005774688 0.05086675
## Credit_History -0.05615235 -0.008691700 -0.04077297
## Loan_Amount_Term Credit_History
## ApplicantIncome -0.010838092 -0.05615235
## CoapplicantIncome -0.005774688 -0.00869170
## LoanAmount 0.050866753 -0.04077297
## Loan_Amount_Term 1.000000000 0.03293716
## Credit_History 0.032937159 1.00000000
3.3 EDA dengan cara Malas
library(funModeling)
library(tidyverse)
library(Hmisc)
library(skimr)
basic_eda <- function(data)
{
glimpse(data)
skim(data)
df_status(data)
freq(data)
profiling_num(data)
plot_num(data)
describe(data)
}
basic_eda(df)## Rows: 480
## Columns: 13
## $ Loan_ID <chr> "LP001003", "LP001005", "LP001006", "LP001008", "LP0~
## $ Gender <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Mal~
## $ Married <chr> "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "Yes~
## $ Dependents <chr> "1", "0", "0", "0", "2", "0", "3+", "2", "1", "2", "~
## $ Education <chr> "Graduate", "Graduate", "Not Graduate", "Graduate", ~
## $ Self_Employed <chr> "No", "Yes", "No", "No", "Yes", "No", "No", "No", "N~
## $ ApplicantIncome <dbl> 4583, 3000, 2583, 6000, 5417, 2333, 3036, 4006, 1284~
## $ CoapplicantIncome <dbl> 1508, 0, 2358, 0, 4196, 1516, 2504, 1526, 10968, 700~
## $ LoanAmount <dbl> 128, 66, 120, 141, 267, 95, 158, 168, 349, 70, 200, ~
## $ Loan_Amount_Term <dbl> 360, 360, 360, 360, 360, 360, 360, 360, 360, 360, 36~
## $ Credit_History <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1~
## $ Property_Area <chr> "Rural", "Urban", "Urban", "Urban", "Urban", "Urban"~
## $ Loan_Status <chr> "N", "Y", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y", "Y~
## variable q_zeros p_zeros q_na p_na q_inf p_inf type unique
## 1 Loan_ID 0 0.00 0 0 0 0 character 480
## 2 Gender 0 0.00 0 0 0 0 character 2
## 3 Married 0 0.00 0 0 0 0 character 2
## 4 Dependents 274 57.08 0 0 0 0 character 4
## 5 Education 0 0.00 0 0 0 0 character 2
## 6 Self_Employed 0 0.00 0 0 0 0 character 2
## 7 ApplicantIncome 0 0.00 0 0 0 0 numeric 405
## 8 CoapplicantIncome 216 45.00 0 0 0 0 numeric 232
## 9 LoanAmount 0 0.00 0 0 0 0 numeric 186
## 10 Loan_Amount_Term 0 0.00 0 0 0 0 numeric 9
## 11 Credit_History 70 14.58 0 0 0 0 numeric 2
## 12 Property_Area 0 0.00 0 0 0 0 character 3
## 13 Loan_Status 0 0.00 0 0 0 0 character 2
## Loan_ID frequency percentage cumulative_perc
## 1 LP001003 1 0.21 0.21
## 2 LP001005 1 0.21 0.42
## 3 LP001006 1 0.21 0.63
## 4 LP001008 1 0.21 0.84
## 5 LP001011 1 0.21 1.05
## 6 LP001013 1 0.21 1.26
## 7 LP001014 1 0.21 1.47
## 8 LP001018 1 0.21 1.68
## 9 LP001020 1 0.21 1.89
## 10 LP001024 1 0.21 2.10
## 11 LP001028 1 0.21 2.31
## 12 LP001029 1 0.21 2.52
## 13 LP001030 1 0.21 2.73
## 14 LP001032 1 0.21 2.94
## 15 LP001036 1 0.21 3.15
## 16 LP001038 1 0.21 3.36
## 17 LP001043 1 0.21 3.57
## 18 LP001046 1 0.21 3.78
## 19 LP001047 1 0.21 3.99
## 20 LP001066 1 0.21 4.20
## 21 LP001068 1 0.21 4.41
## 22 LP001073 1 0.21 4.62
## 23 LP001086 1 0.21 4.83
## 24 LP001095 1 0.21 5.04
## 25 LP001097 1 0.21 5.25
## 26 LP001098 1 0.21 5.46
## 27 LP001100 1 0.21 5.67
## 28 LP001112 1 0.21 5.88
## 29 LP001114 1 0.21 6.09
## 30 LP001116 1 0.21 6.30
## 31 LP001119 1 0.21 6.51
## 32 LP001120 1 0.21 6.72
## 33 LP001131 1 0.21 6.93
## 34 LP001138 1 0.21 7.14
## 35 LP001144 1 0.21 7.35
## 36 LP001146 1 0.21 7.56
## 37 LP001151 1 0.21 7.77
## 38 LP001155 1 0.21 7.98
## 39 LP001157 1 0.21 8.19
## 40 LP001164 1 0.21 8.40
## 41 LP001179 1 0.21 8.61
## 42 LP001186 1 0.21 8.82
## 43 LP001194 1 0.21 9.03
## 44 LP001195 1 0.21 9.24
## 45 LP001197 1 0.21 9.45
## 46 LP001198 1 0.21 9.66
## 47 LP001199 1 0.21 9.87
## 48 LP001205 1 0.21 10.08
## 49 LP001206 1 0.21 10.29
## 50 LP001207 1 0.21 10.50
## 51 LP001222 1 0.21 10.71
## 52 LP001225 1 0.21 10.92
## 53 LP001228 1 0.21 11.13
## 54 LP001233 1 0.21 11.34
## 55 LP001238 1 0.21 11.55
## 56 LP001241 1 0.21 11.76
## 57 LP001243 1 0.21 11.97
## 58 LP001245 1 0.21 12.18
## 59 LP001248 1 0.21 12.39
## 60 LP001253 1 0.21 12.60
## 61 LP001255 1 0.21 12.81
## 62 LP001256 1 0.21 13.02
## 63 LP001259 1 0.21 13.23
## 64 LP001263 1 0.21 13.44
## 65 LP001265 1 0.21 13.65
## 66 LP001267 1 0.21 13.86
## 67 LP001275 1 0.21 14.07
## 68 LP001279 1 0.21 14.28
## 69 LP001282 1 0.21 14.49
## 70 LP001289 1 0.21 14.70
## 71 LP001310 1 0.21 14.91
## 72 LP001316 1 0.21 15.12
## 73 LP001318 1 0.21 15.33
## 74 LP001319 1 0.21 15.54
## 75 LP001322 1 0.21 15.75
## 76 LP001325 1 0.21 15.96
## 77 LP001327 1 0.21 16.17
## 78 LP001333 1 0.21 16.38
## 79 LP001334 1 0.21 16.59
## 80 LP001343 1 0.21 16.80
## 81 LP001345 1 0.21 17.01
## 82 LP001349 1 0.21 17.22
## 83 LP001367 1 0.21 17.43
## 84 LP001369 1 0.21 17.64
## 85 LP001379 1 0.21 17.85
## 86 LP001384 1 0.21 18.06
## 87 LP001385 1 0.21 18.27
## 88 LP001401 1 0.21 18.48
## 89 LP001404 1 0.21 18.69
## 90 LP001421 1 0.21 18.90
## 91 LP001422 1 0.21 19.11
## 92 LP001430 1 0.21 19.32
## 93 LP001431 1 0.21 19.53
## 94 LP001432 1 0.21 19.74
## 95 LP001439 1 0.21 19.95
## 96 LP001451 1 0.21 20.16
## 97 LP001473 1 0.21 20.37
## 98 LP001478 1 0.21 20.58
## 99 LP001482 1 0.21 20.79
## 100 LP001487 1 0.21 21.00
## 101 LP001488 1 0.21 21.21
## 102 LP001489 1 0.21 21.42
## 103 LP001491 1 0.21 21.63
## 104 LP001492 1 0.21 21.84
## 105 LP001493 1 0.21 22.05
## 106 LP001497 1 0.21 22.26
## 107 LP001498 1 0.21 22.47
## 108 LP001504 1 0.21 22.68
## 109 LP001507 1 0.21 22.89
## 110 LP001508 1 0.21 23.10
## 111 LP001514 1 0.21 23.31
## 112 LP001516 1 0.21 23.52
## 113 LP001518 1 0.21 23.73
## 114 LP001519 1 0.21 23.94
## 115 LP001520 1 0.21 24.15
## 116 LP001528 1 0.21 24.36
## 117 LP001529 1 0.21 24.57
## 118 LP001531 1 0.21 24.78
## 119 LP001532 1 0.21 24.99
## 120 LP001535 1 0.21 25.20
## 121 LP001536 1 0.21 25.41
## 122 LP001543 1 0.21 25.62
## 123 LP001552 1 0.21 25.83
## 124 LP001560 1 0.21 26.04
## 125 LP001562 1 0.21 26.25
## 126 LP001565 1 0.21 26.46
## 127 LP001570 1 0.21 26.67
## 128 LP001572 1 0.21 26.88
## 129 LP001577 1 0.21 27.09
## 130 LP001578 1 0.21 27.30
## 131 LP001579 1 0.21 27.51
## 132 LP001580 1 0.21 27.72
## 133 LP001586 1 0.21 27.93
## 134 LP001594 1 0.21 28.14
## 135 LP001603 1 0.21 28.35
## 136 LP001606 1 0.21 28.56
## 137 LP001608 1 0.21 28.77
## 138 LP001610 1 0.21 28.98
## 139 LP001616 1 0.21 29.19
## 140 LP001630 1 0.21 29.40
## 141 LP001633 1 0.21 29.61
## 142 LP001636 1 0.21 29.82
## 143 LP001637 1 0.21 30.03
## 144 LP001639 1 0.21 30.24
## 145 LP001640 1 0.21 30.45
## 146 LP001641 1 0.21 30.66
## 147 LP001647 1 0.21 30.87
## 148 LP001653 1 0.21 31.08
## 149 LP001656 1 0.21 31.29
## 150 LP001657 1 0.21 31.50
## 151 LP001658 1 0.21 31.71
## 152 LP001664 1 0.21 31.92
## 153 LP001665 1 0.21 32.13
## 154 LP001666 1 0.21 32.34
## 155 LP001673 1 0.21 32.55
## 156 LP001674 1 0.21 32.76
## 157 LP001677 1 0.21 32.97
## 158 LP001688 1 0.21 33.18
## 159 LP001691 1 0.21 33.39
## 160 LP001692 1 0.21 33.60
## 161 LP001693 1 0.21 33.81
## 162 LP001698 1 0.21 34.02
## 163 LP001699 1 0.21 34.23
## 164 LP001702 1 0.21 34.44
## 165 LP001708 1 0.21 34.65
## 166 LP001711 1 0.21 34.86
## 167 LP001713 1 0.21 35.07
## 168 LP001715 1 0.21 35.28
## 169 LP001716 1 0.21 35.49
## 170 LP001720 1 0.21 35.70
## 171 LP001722 1 0.21 35.91
## 172 LP001726 1 0.21 36.12
## 173 LP001736 1 0.21 36.33
## 174 LP001743 1 0.21 36.54
## 175 LP001744 1 0.21 36.75
## 176 LP001750 1 0.21 36.96
## 177 LP001751 1 0.21 37.17
## 178 LP001758 1 0.21 37.38
## 179 LP001761 1 0.21 37.59
## 180 LP001765 1 0.21 37.80
## 181 LP001776 1 0.21 38.01
## 182 LP001778 1 0.21 38.22
## 183 LP001784 1 0.21 38.43
## 184 LP001790 1 0.21 38.64
## 185 LP001792 1 0.21 38.85
## 186 LP001798 1 0.21 39.06
## 187 LP001800 1 0.21 39.27
## 188 LP001806 1 0.21 39.48
## 189 LP001807 1 0.21 39.69
## 190 LP001811 1 0.21 39.90
## 191 LP001813 1 0.21 40.11
## 192 LP001814 1 0.21 40.32
## 193 LP001819 1 0.21 40.53
## 194 LP001824 1 0.21 40.74
## 195 LP001825 1 0.21 40.95
## 196 LP001835 1 0.21 41.16
## 197 LP001836 1 0.21 41.37
## 198 LP001841 1 0.21 41.58
## 199 LP001843 1 0.21 41.79
## 200 LP001844 1 0.21 42.00
## 201 LP001846 1 0.21 42.21
## 202 LP001849 1 0.21 42.42
## 203 LP001854 1 0.21 42.63
## 204 LP001859 1 0.21 42.84
## 205 LP001868 1 0.21 43.05
## 206 LP001870 1 0.21 43.26
## 207 LP001871 1 0.21 43.47
## 208 LP001872 1 0.21 43.68
## 209 LP001875 1 0.21 43.89
## 210 LP001877 1 0.21 44.10
## 211 LP001882 1 0.21 44.31
## 212 LP001884 1 0.21 44.52
## 213 LP001888 1 0.21 44.73
## 214 LP001891 1 0.21 44.94
## 215 LP001892 1 0.21 45.15
## 216 LP001894 1 0.21 45.36
## 217 LP001896 1 0.21 45.57
## 218 LP001900 1 0.21 45.78
## 219 LP001903 1 0.21 45.99
## 220 LP001904 1 0.21 46.20
## 221 LP001907 1 0.21 46.41
## 222 LP001910 1 0.21 46.62
## 223 LP001914 1 0.21 46.83
## 224 LP001915 1 0.21 47.04
## 225 LP001917 1 0.21 47.25
## 226 LP001924 1 0.21 47.46
## 227 LP001925 1 0.21 47.67
## 228 LP001926 1 0.21 47.88
## 229 LP001931 1 0.21 48.09
## 230 LP001935 1 0.21 48.30
## 231 LP001936 1 0.21 48.51
## 232 LP001938 1 0.21 48.72
## 233 LP001940 1 0.21 48.93
## 234 LP001947 1 0.21 49.14
## 235 LP001953 1 0.21 49.35
## 236 LP001954 1 0.21 49.56
## 237 LP001955 1 0.21 49.77
## 238 LP001963 1 0.21 49.98
## 239 LP001964 1 0.21 50.19
## 240 LP001974 1 0.21 50.40
## 241 LP001977 1 0.21 50.61
## 242 LP001978 1 0.21 50.82
## 243 LP001993 1 0.21 51.03
## 244 LP001994 1 0.21 51.24
## 245 LP001996 1 0.21 51.45
## 246 LP002002 1 0.21 51.66
## 247 LP002004 1 0.21 51.87
## 248 LP002006 1 0.21 52.08
## 249 LP002031 1 0.21 52.29
## 250 LP002035 1 0.21 52.50
## 251 LP002050 1 0.21 52.71
## 252 LP002051 1 0.21 52.92
## 253 LP002053 1 0.21 53.13
## 254 LP002065 1 0.21 53.34
## 255 LP002067 1 0.21 53.55
## 256 LP002068 1 0.21 53.76
## 257 LP002082 1 0.21 53.97
## 258 LP002086 1 0.21 54.18
## 259 LP002087 1 0.21 54.39
## 260 LP002097 1 0.21 54.60
## 261 LP002098 1 0.21 54.81
## 262 LP002112 1 0.21 55.02
## 263 LP002114 1 0.21 55.23
## 264 LP002115 1 0.21 55.44
## 265 LP002116 1 0.21 55.65
## 266 LP002119 1 0.21 55.86
## 267 LP002126 1 0.21 56.07
## 268 LP002129 1 0.21 56.28
## 269 LP002131 1 0.21 56.49
## 270 LP002138 1 0.21 56.70
## 271 LP002139 1 0.21 56.91
## 272 LP002140 1 0.21 57.12
## 273 LP002141 1 0.21 57.33
## 274 LP002142 1 0.21 57.54
## 275 LP002143 1 0.21 57.75
## 276 LP002149 1 0.21 57.96
## 277 LP002151 1 0.21 58.17
## 278 LP002158 1 0.21 58.38
## 279 LP002160 1 0.21 58.59
## 280 LP002161 1 0.21 58.80
## 281 LP002170 1 0.21 59.01
## 282 LP002175 1 0.21 59.22
## 283 LP002180 1 0.21 59.43
## 284 LP002181 1 0.21 59.64
## 285 LP002187 1 0.21 59.85
## 286 LP002190 1 0.21 60.06
## 287 LP002191 1 0.21 60.27
## 288 LP002194 1 0.21 60.48
## 289 LP002197 1 0.21 60.69
## 290 LP002201 1 0.21 60.90
## 291 LP002205 1 0.21 61.11
## 292 LP002211 1 0.21 61.32
## 293 LP002219 1 0.21 61.53
## 294 LP002224 1 0.21 61.74
## 295 LP002225 1 0.21 61.95
## 296 LP002229 1 0.21 62.16
## 297 LP002231 1 0.21 62.37
## 298 LP002234 1 0.21 62.58
## 299 LP002236 1 0.21 62.79
## 300 LP002239 1 0.21 63.00
## 301 LP002244 1 0.21 63.21
## 302 LP002250 1 0.21 63.42
## 303 LP002255 1 0.21 63.63
## 304 LP002262 1 0.21 63.84
## 305 LP002265 1 0.21 64.05
## 306 LP002266 1 0.21 64.26
## 307 LP002277 1 0.21 64.47
## 308 LP002281 1 0.21 64.68
## 309 LP002284 1 0.21 64.89
## 310 LP002287 1 0.21 65.10
## 311 LP002288 1 0.21 65.31
## 312 LP002296 1 0.21 65.52
## 313 LP002297 1 0.21 65.73
## 314 LP002300 1 0.21 65.94
## 315 LP002301 1 0.21 66.15
## 316 LP002305 1 0.21 66.36
## 317 LP002308 1 0.21 66.57
## 318 LP002314 1 0.21 66.78
## 319 LP002315 1 0.21 66.99
## 320 LP002317 1 0.21 67.20
## 321 LP002318 1 0.21 67.41
## 322 LP002328 1 0.21 67.62
## 323 LP002332 1 0.21 67.83
## 324 LP002335 1 0.21 68.04
## 325 LP002337 1 0.21 68.25
## 326 LP002341 1 0.21 68.46
## 327 LP002342 1 0.21 68.67
## 328 LP002345 1 0.21 68.88
## 329 LP002347 1 0.21 69.09
## 330 LP002348 1 0.21 69.30
## 331 LP002361 1 0.21 69.51
## 332 LP002364 1 0.21 69.72
## 333 LP002366 1 0.21 69.93
## 334 LP002367 1 0.21 70.14
## 335 LP002368 1 0.21 70.35
## 336 LP002369 1 0.21 70.56
## 337 LP002370 1 0.21 70.77
## 338 LP002377 1 0.21 70.98
## 339 LP002379 1 0.21 71.19
## 340 LP002387 1 0.21 71.40
## 341 LP002390 1 0.21 71.61
## 342 LP002398 1 0.21 71.82
## 343 LP002403 1 0.21 72.03
## 344 LP002407 1 0.21 72.24
## 345 LP002408 1 0.21 72.45
## 346 LP002409 1 0.21 72.66
## 347 LP002418 1 0.21 72.87
## 348 LP002422 1 0.21 73.08
## 349 LP002429 1 0.21 73.29
## 350 LP002434 1 0.21 73.50
## 351 LP002443 1 0.21 73.71
## 352 LP002446 1 0.21 73.92
## 353 LP002448 1 0.21 74.13
## 354 LP002449 1 0.21 74.34
## 355 LP002453 1 0.21 74.55
## 356 LP002455 1 0.21 74.76
## 357 LP002459 1 0.21 74.97
## 358 LP002467 1 0.21 75.18
## 359 LP002472 1 0.21 75.39
## 360 LP002473 1 0.21 75.60
## 361 LP002484 1 0.21 75.81
## 362 LP002487 1 0.21 76.02
## 363 LP002493 1 0.21 76.23
## 364 LP002494 1 0.21 76.44
## 365 LP002500 1 0.21 76.65
## 366 LP002505 1 0.21 76.86
## 367 LP002515 1 0.21 77.07
## 368 LP002517 1 0.21 77.28
## 369 LP002519 1 0.21 77.49
## 370 LP002524 1 0.21 77.70
## 371 LP002527 1 0.21 77.91
## 372 LP002529 1 0.21 78.12
## 373 LP002531 1 0.21 78.33
## 374 LP002534 1 0.21 78.54
## 375 LP002536 1 0.21 78.75
## 376 LP002537 1 0.21 78.96
## 377 LP002541 1 0.21 79.17
## 378 LP002543 1 0.21 79.38
## 379 LP002544 1 0.21 79.59
## 380 LP002545 1 0.21 79.80
## 381 LP002547 1 0.21 80.01
## 382 LP002555 1 0.21 80.22
## 383 LP002556 1 0.21 80.43
## 384 LP002571 1 0.21 80.64
## 385 LP002582 1 0.21 80.85
## 386 LP002585 1 0.21 81.06
## 387 LP002586 1 0.21 81.27
## 388 LP002587 1 0.21 81.48
## 389 LP002600 1 0.21 81.69
## 390 LP002602 1 0.21 81.90
## 391 LP002603 1 0.21 82.11
## 392 LP002606 1 0.21 82.32
## 393 LP002615 1 0.21 82.53
## 394 LP002619 1 0.21 82.74
## 395 LP002622 1 0.21 82.95
## 396 LP002626 1 0.21 83.16
## 397 LP002634 1 0.21 83.37
## 398 LP002637 1 0.21 83.58
## 399 LP002640 1 0.21 83.79
## 400 LP002643 1 0.21 84.00
## 401 LP002648 1 0.21 84.21
## 402 LP002652 1 0.21 84.42
## 403 LP002659 1 0.21 84.63
## 404 LP002670 1 0.21 84.84
## 405 LP002683 1 0.21 85.05
## 406 LP002684 1 0.21 85.26
## 407 LP002689 1 0.21 85.47
## 408 LP002690 1 0.21 85.68
## 409 LP002692 1 0.21 85.89
## 410 LP002693 1 0.21 86.10
## 411 LP002699 1 0.21 86.31
## 412 LP002705 1 0.21 86.52
## 413 LP002706 1 0.21 86.73
## 414 LP002714 1 0.21 86.94
## 415 LP002716 1 0.21 87.15
## 416 LP002720 1 0.21 87.36
## 417 LP002723 1 0.21 87.57
## 418 LP002731 1 0.21 87.78
## 419 LP002734 1 0.21 87.99
## 420 LP002738 1 0.21 88.20
## 421 LP002739 1 0.21 88.41
## 422 LP002740 1 0.21 88.62
## 423 LP002741 1 0.21 88.83
## 424 LP002743 1 0.21 89.04
## 425 LP002755 1 0.21 89.25
## 426 LP002767 1 0.21 89.46
## 427 LP002768 1 0.21 89.67
## 428 LP002772 1 0.21 89.88
## 429 LP002776 1 0.21 90.09
## 430 LP002777 1 0.21 90.30
## 431 LP002785 1 0.21 90.51
## 432 LP002788 1 0.21 90.72
## 433 LP002789 1 0.21 90.93
## 434 LP002792 1 0.21 91.14
## 435 LP002795 1 0.21 91.35
## 436 LP002798 1 0.21 91.56
## 437 LP002804 1 0.21 91.77
## 438 LP002807 1 0.21 91.98
## 439 LP002813 1 0.21 92.19
## 440 LP002820 1 0.21 92.40
## 441 LP002821 1 0.21 92.61
## 442 LP002832 1 0.21 92.82
## 443 LP002836 1 0.21 93.03
## 444 LP002837 1 0.21 93.24
## 445 LP002840 1 0.21 93.45
## 446 LP002841 1 0.21 93.66
## 447 LP002842 1 0.21 93.87
## 448 LP002855 1 0.21 94.08
## 449 LP002862 1 0.21 94.29
## 450 LP002863 1 0.21 94.50
## 451 LP002868 1 0.21 94.71
## 452 LP002874 1 0.21 94.92
## 453 LP002877 1 0.21 95.13
## 454 LP002892 1 0.21 95.34
## 455 LP002893 1 0.21 95.55
## 456 LP002894 1 0.21 95.76
## 457 LP002911 1 0.21 95.97
## 458 LP002912 1 0.21 96.18
## 459 LP002916 1 0.21 96.39
## 460 LP002917 1 0.21 96.60
## 461 LP002926 1 0.21 96.81
## 462 LP002928 1 0.21 97.02
## 463 LP002931 1 0.21 97.23
## 464 LP002936 1 0.21 97.44
## 465 LP002938 1 0.21 97.65
## 466 LP002940 1 0.21 97.86
## 467 LP002941 1 0.21 98.07
## 468 LP002945 1 0.21 98.28
## 469 LP002948 1 0.21 98.49
## 470 LP002953 1 0.21 98.70
## 471 LP002958 1 0.21 98.91
## 472 LP002959 1 0.21 99.12
## 473 LP002961 1 0.21 99.33
## 474 LP002964 1 0.21 99.54
## 475 LP002974 1 0.21 99.75
## 476 LP002978 1 0.21 99.96
## 477 LP002979 1 0.21 100.17
## 478 LP002983 1 0.21 100.38
## 479 LP002984 1 0.21 100.59
## 480 LP002990 1 0.21 100.00
## Gender frequency percentage cumulative_perc
## 1 Male 394 82.08 82.08
## 2 Female 86 17.92 100.00
## Married frequency percentage cumulative_perc
## 1 Yes 311 64.79 64.79
## 2 No 169 35.21 100.00
## Dependents frequency percentage cumulative_perc
## 1 0 274 57.08 57.08
## 2 2 85 17.71 74.79
## 3 1 80 16.67 91.46
## 4 3+ 41 8.54 100.00
## Education frequency percentage cumulative_perc
## 1 Graduate 383 79.79 79.79
## 2 Not Graduate 97 20.21 100.00
## Self_Employed frequency percentage cumulative_perc
## 1 No 414 86.25 86.25
## 2 Yes 66 13.75 100.00
## Property_Area frequency percentage cumulative_perc
## 1 Semiurban 191 39.79 39.79
## 2 Urban 150 31.25 71.04
## 3 Rural 139 28.96 100.00
## Loan_Status frequency percentage cumulative_perc
## 1 Y 332 69.17 69.17
## 2 N 148 30.83 100.00
## data
##
## 13 Variables 480 Observations
## --------------------------------------------------------------------------------
## Loan_ID
## n missing distinct
## 480 0 480
##
## lowest : LP001003 LP001005 LP001006 LP001008 LP001011
## highest: LP002978 LP002979 LP002983 LP002984 LP002990
## --------------------------------------------------------------------------------
## Gender
## n missing distinct
## 480 0 2
##
## Value Female Male
## Frequency 86 394
## Proportion 0.179 0.821
## --------------------------------------------------------------------------------
## Married
## n missing distinct
## 480 0 2
##
## Value No Yes
## Frequency 169 311
## Proportion 0.352 0.648
## --------------------------------------------------------------------------------
## Dependents
## n missing distinct
## 480 0 4
##
## Value 0 1 2 3+
## Frequency 274 80 85 41
## Proportion 0.571 0.167 0.177 0.085
## --------------------------------------------------------------------------------
## Education
## n missing distinct
## 480 0 2
##
## Value Graduate Not Graduate
## Frequency 383 97
## Proportion 0.798 0.202
## --------------------------------------------------------------------------------
## Self_Employed
## n missing distinct
## 480 0 2
##
## Value No Yes
## Frequency 414 66
## Proportion 0.863 0.138
## --------------------------------------------------------------------------------
## ApplicantIncome
## n missing distinct Info Mean Gmd .05 .10
## 480 0 405 1 5364 4022 1928 2239
## .25 .50 .75 .90 .95
## 2899 3859 5852 9511 14583
##
## lowest : 150 645 1000 1025 1299, highest: 33846 37719 39147 39999 81000
## --------------------------------------------------------------------------------
## CoapplicantIncome
## n missing distinct Info Mean Gmd .05 .10
## 480 0 232 0.909 1581 2068 0 0
## .25 .50 .75 .90 .95
## 0 1084 2253 3797 4996
##
## lowest : 0.00 16.12 189.00 240.00 242.00
## highest: 8980.00 10968.00 11300.00 20000.00 33837.00
## --------------------------------------------------------------------------------
## LoanAmount
## n missing distinct Info Mean Gmd .05 .10
## 480 0 186 1 144.7 77.5 54.95 70.00
## .25 .50 .75 .90 .95
## 100.00 128.00 170.00 234.20 286.20
##
## lowest : 9 17 25 26 30, highest: 495 496 500 570 600
## --------------------------------------------------------------------------------
## Loan_Amount_Term
## n missing distinct Info Mean Gmd
## 480 0 9 0.372 342.1 43.78
##
## lowest : 36 60 84 120 180, highest: 180 240 300 360 480
##
## Value 36 60 84 120 180 240 300 360 480
## Frequency 2 2 3 3 36 2 9 411 12
## Proportion 0.004 0.004 0.006 0.006 0.075 0.004 0.019 0.856 0.025
## --------------------------------------------------------------------------------
## Credit_History
## n missing distinct Info Sum Mean Gmd
## 480 0 2 0.374 410 0.8542 0.2497
##
## --------------------------------------------------------------------------------
## Property_Area
## n missing distinct
## 480 0 3
##
## Value Rural Semiurban Urban
## Frequency 139 191 150
## Proportion 0.290 0.398 0.312
## --------------------------------------------------------------------------------
## Loan_Status
## n missing distinct
## 480 0 2
##
## Value N Y
## Frequency 148 332
## Proportion 0.308 0.692
## --------------------------------------------------------------------------------
Tugas 4
Lakukan pemeriksaan distribusi densitas pada setiap variabel kuantitatif menggunakan R dan Python dengan beberapa bagian sebagai berikut:
4.1 Univariat Numerik
library(ggplot2)
x_train<-read.csv("loan-train.csv")
df = Filter(is.numeric, x_train)
df = na.omit(df)4.1.1 ApplicantIncome
#Hist of ApplicantIncome
hist(df$ApplicantIncome, main ="",
col = "blue",
freq = FALSE,
xlab = "")
# ...and add a density curve
curve(dnorm(x, mean=mean(df$ApplicantIncome),
sd=sd(df$ApplicantIncome)), add=TRUE,
col="black", lwd=3)library(visualize) # distribution visualization
par(mfrow=c(2,2)) # partition graph by 2x2 rows and column
visualize.norm(stat=1,mu=mean(df$ApplicantIncome),sd=sd(df$ApplicantIncome),section="lower") # evaluates lower tail
visualize.norm(stat=c(10,5000),mu=mean(df$ApplicantIncome),sd=sd(df$ApplicantIncome),section="bounded")# evaluates bounded region
visualize.norm(stat=1,mu=mean(df$ApplicantIncome),sd=sd(df$ApplicantIncome),section="upper")4.1.2 CoapplicantIncome
#Hist of CoapplicantIncome
hist(df$CoapplicantIncome, main ="",
col = "blue",
freq = FALSE,
xlab = "")
# ...and add a density curve
curve(dnorm(x, mean=mean(df$CoapplicantIncome),
sd=sd(df$CoapplicantIncome)), add=TRUE,
col="black", lwd=3)par(mfrow=c(2,2))
visualize.norm(stat=1,mu=mean(df$CoapplicantIncome),sd=sd(df$CoapplicantIncome),section="lower")
visualize.norm(stat=c(10,2500),mu=mean(df$CoapplicantIncome),sd=sd(df$CoapplicantIncome),section="bounded")
visualize.norm(stat=1,mu=mean(df$CoapplicantIncome),sd=sd(df$CoapplicantIncome),section="upper")4.1.3 LoanAmount
#Hist of LoanAmount
hist(df$LoanAmount, main ="",
col = "blue",
freq = FALSE,
xlab = "")
# ...and add a density curve
curve(dnorm(x, mean=mean(df$LoanAmount),
sd=sd(df$LoanAmount)), add=TRUE,
col="black", lwd=3)par(mfrow=c(2,2))
visualize.norm(stat=1,mu=mean(df$LoanAmount),sd=sd(df$LoanAmount),section="lower")
visualize.norm(stat=c(10,100),mu=mean(df$LoanAmount),sd=sd(df$LoanAmount),section="bounded")
visualize.norm(stat=1,mu=mean(df$LoanAmount),sd=sd(df$LoanAmount),section="upper")4.1.4 Loan_Amount_Term
#Hist of Loan_Amount_Term
hist(df$Loan_Amount_Term, main ="",
col = "blue",
freq = FALSE,
xlab = "")
# ...and add a density curve
curve(dnorm(x, mean=mean(df$Loan_Amount_Term),
sd=sd(df$Loan_Amount_Term)), add=TRUE,
col="black", lwd=3)par(mfrow=c(2,2))
visualize.norm(stat=1,mu=mean(df$Loan_Amount_Term),sd=sd(df$Loan_Amount_Term),section="lower")
visualize.norm(stat=c(100,300),mu=mean(df$Loan_Amount_Term),sd=sd(df$Loan_Amount_Term),section="bounded")
visualize.norm(stat=1,mu=mean(df$Loan_Amount_Term),sd=sd(df$Loan_Amount_Term),section="upper")4.1.5 Credit_History
#Hist of Credit_History
hist(df$Credit_History, main ="",
col = "blue",
freq = FALSE,
xlab = "")
# ...and add a density curve
curve(dnorm(x, mean=mean(df$Credit_History),
sd=sd(df$Credit_History)), add=TRUE,
col="black", lwd=3)par(mfrow=c(2,2))
visualize.norm(stat=1,mu=mean(df$Credit_History),sd=sd(df$Credit_History),section="lower")
visualize.norm(stat=c(0.5,1),mu=mean(df$Credit_History),sd=sd(df$Credit_History),section="bounded")
visualize.norm(stat=1,mu=mean(df$Credit_History),sd=sd(df$Credit_History),section="upper")4.2 Bivariat Numerik
4.2.1 ApplicantIncome dan CoapplicantIncome
library(ggplot2)
p <- ggplot(df, aes(x = ApplicantIncome,
y = CoapplicantIncome)) +
geom_point(alpha = .4) +
geom_density_2d()
p4.2.2 ApplicantIncome dan LoanAmount
library(ggplot2)
p <- ggplot(df, aes(x = ApplicantIncome,
y = LoanAmount)) +
geom_point(alpha = .4) +
geom_density_2d()
p4.2.3 CoapplicantIncome dan LoanAmount
library(ggplot2)
p <- ggplot(df, aes(x = CoapplicantIncome,
y = LoanAmount)) +
geom_point(alpha = .4) +
geom_density_2d()
p4.3 Multivariat Numerik
library(ggplot2)
p2 <- ggplot(df, aes(x = ApplicantIncome,
y = CoapplicantIncome, color = LoanAmount)) +
geom_point(alpha = .7) +
geom_density_2d()
p2Tugas 5
Lakukan proses pengujian Hipotesis menggunakan R dan Python pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:
5.1 Margin of error dan Estimasi interval
Hitunglah margin of error dan estimasi interval untuk proporsi peminjam bejenis kelamin perempuan dalam pada tingkat kepercayaan 95%.
library(MASS)
df<-read.csv("loan-train.csv")
n = length(df$Gender)
k = sum(df$Gender == "Female")
pbar = k/n
SE = sqrt(pbar*(1-pbar)/n)
SE # standard error ## [1] 0.01558505
n## [1] 614
k## [1] 112
E = qnorm(.975)*SE
E ## [1] 0.03054614
prop.test(k, n) # the interval estimate of proportion##
## 1-sample proportions test with continuity correction
##
## data: k out of n, null probability 0.5
## X-squared = 246.45, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.1531133 0.2157616
## sample estimates:
## p
## 0.1824104
5.2 Ukuran sampel
Jika anda berencana menggunakan perkiraan proporsi 50% data konsumen berjenis kelamin perempuan, temukan ukuran sampel yang diperlukan untuk mencapai margin kesalahan 5% untuk data obeservasi pada tingkat kepercayaan 95%.
Quan = qnorm(.975) # quantiles (95% confidence level)
p = 0.5 # 50% planned proportion estimate
E = 0.05 # expected error
Quan^2*p*(1-p)/E^2 # sampling size## [1] 384.1459
5.3 Pembuktian kebenaran assumsi dengan pinjaman rata-rata konsumen
Lakukan pembuktian kebenaran assumsi dengan tingkat signifikansi 0.05, jika Bank mengklaim bahwa pinjaman rata-rata konsumen adalah:
x_train<-read.csv("loan-train.csv")
df<-na.omit(x_train$LoanAmount)
alpha = 0.05 #significance level
mu0 = 150 #hypothesized value
n = length(df) #sample size
n## [1] 592
xbar = mean(df) #sample mean
xbar## [1] 146.4122
s = sd(df) #standar deviance
s## [1] 85.58733
z = (xbar-mu0)/(s/sqrt(n))
z # test statistic ## [1] -1.019963
5.3.1 Lebih besar $ 150
#Right tailed
z.alpha = qnorm(1-alpha) # right tail critical value
z.alpha## [1] 1.644854
5.3.2 Lebih kecil $ 150
#Left tailed
-z.alpha # left tail critical value## [1] -1.644854
5.3.3 Sama dengan $ 150
#Two tailed
z.half.alpha = qnorm(1-alpha/2)
c(-z.half.alpha, z.half.alpha) ## [1] -1.959964 1.959964
5.4 Pembuktian kebenaran assumsi dengan simpangan baku pinjaman
Lakukan pembuktian kebenaran assumsi dengan tingkat signifikansi 0.05, seperti diatas jika diketahui simpangan baku pinjaman adalah $ 85.
x_train<-read.csv("loan-train.csv")
df<-na.omit(x_train$LoanAmount)
alpha = 0.05 #significance level
mu0 = 150 #hypothesized value
n = length(df) #sample size
n## [1] 592
xbar = mean(df) #sample mean
xbar## [1] 146.4122
s = 85 #standar deviance
s## [1] 85
z = (xbar-mu0)/(s/sqrt(n))
z # test statistic ## [1] -1.02701
5.3.1 Lebih besar $ 150
#Right tailed
z.alpha = qnorm(1-alpha) # right tail critical value
z.alpha## [1] 1.644854
5.3.2 Lebih kecil $ 150
#Left tailed
-z.alpha # left tail critical value## [1] -1.644854
5.3.3 Sama dengan $ 150
#Two tailed
z.half.alpha = qnorm(1-alpha/2)
c(-z.half.alpha, z.half.alpha) ## [1] -1.959964 1.959964
Referensi
1.Pertemuan 3 2.Pertemuan 4 3.Pertemuan 5 4.Pertemuan 6 5.Pertemuan 7