Computational Statistics
Mid Term - Week 8
Email : je070601@gmail.com
RPubs : https://rpubs.com/invokerarts/
Github : https://github.com/invokerarts
Majors : Business Statistics
Address : ARA Center, Matana University Tower Jl. CBD Barat Kav, RT.1, Curug Sangereng,
Kelapa Dua, Tangerang, Banten 15810.
library(dplyr)
library(quantable)
library(naniar)
library(ggplot2)
library(e1071)
library(ggpubr)
library(ks)1 Tugas 1
1.1 Import Data
DataUTS <- read.csv("loan-train.csv", sep = ",")
DataUTS1.2 Penanganan Data Hilang
ColNV <- unique(names(DataUTS)[col(DataUTS)[which(DataUTS == "")]])
ColNV## [1] "Gender" "Married" "Dependents" "Self_Employed"
DataUTSwithNA <- DataUTS %>%
mutate(Gender = replace(Gender, Gender == "", NA)) %>%
mutate(Married = replace(Married, Married == "", NA)) %>%
mutate(Dependents = replace(Dependents, Dependents == "", NA)) %>%
mutate(Self_Employed = replace(Self_Employed, Self_Employed == "", NA))
ColNA <- colnames(DataUTSwithNA)[ apply(DataUTSwithNA, 2, anyNA) ]
ColNA## [1] "Gender" "Married" "Dependents" "Self_Employed"
## [5] "LoanAmount" "Loan_Amount_Term" "Credit_History"
modes <- function(x) {
ux <- unique(x)
tab <- tabulate(match(x, ux))
ux[tab == max(tab)]
}
# replace data numerik dengan rata-ratanya dan data kategorikal dengan modusnya
NoNAData <- DataUTSwithNA %>%
mutate(LoanAmount = ifelse(is.na(LoanAmount), mean(na.omit(DataUTSwithNA$LoanAmount)), LoanAmount),
Loan_Amount_Term = as.character(ifelse(is.na(Loan_Amount_Term), modes(na.omit(DataUTSwithNA$Loan_Amount_Term)), Loan_Amount_Term)),
Credit_History = as.character(ifelse(is.na(Credit_History), modes(na.omit(DataUTSwithNA$Credit_History)), Credit_History)),
Gender = replace(Gender, is.na(Gender), modes(Gender)),
Married = replace(Married, is.na(Married), modes(Married)),
Dependents = replace(Dependents, is.na(Dependents), modes(Dependents)),
Self_Employed = replace(Self_Employed, is.na(Self_Employed), modes(Self_Employed))
)
NoNAData1.3 Periksa Data Duplikat
data.frame(
Jumlah_Data = NoNAData %>% nrow (),
Jumlah_Data_Unik = NoNAData %>% distinct() %>% nrow()
)1.4 Pemisahan Data Kategori dan Numerik
DataKat <- select_if(NoNAData, is.character)
DataNum <- select_if(NoNAData, is.numeric)
DataKatDataNum1.5 Penanganan Data Numerik
1.5.1 Standarisasi
Standarize <- function(x) {
(x- min(x))/(max(x)-min(x))
}
DataStandarisasi <- as.data.frame(lapply(DataNum, scale))
DataStandarisasi1.5.2 Normalisasi
normalize <- function(x) {
(x- min(x))/(max(x)-min(x))
}
DataNormalisasi <- as.data.frame(lapply(DataNum, normalize))
DataNormalisasi1.5.3 Penskalaan Robust
robust_scale <- function(x) {
(x-median(x)) / (quantile(x,probs =.75)-quantile(x,probs =.25))
}
DataRobustScale <- as.data.frame(lapply(DataNum,robust_scale))
DataRobustScale1.6 Penanganan Data Pencilan
outliers <- function(x) {
Q1 <- quantile(x, probs=.25)
Q3 <- quantile(x, probs=.75)
IQR = Q3-Q1
upper_limit = Q3 + (IQR*1.5)
lower_limit = Q1 - (IQR*1.5)
x > upper_limit | x < lower_limit
}
remove_outliers <- function(df, cols = names(df)) {
for (col in cols) {
df <- df[!outliers(df[[col]]),]
}
df
}outliers <- function(x) {
Q1 <- quantile(x, probs=.25)
Q3 <- quantile(x, probs=.75)
iqr = Q3-Q1
upper_limit = Q3 + (iqr*1.5)
lower_limit = Q1 - (iqr*1.5)
x > upper_limit | x < lower_limit
}
Oco <- subset(DataNum, outliers(DataNum$LoanAmount))
Oap <- subset(DataNum, outliers(DataNum$ApplicantIncome))
Ola <- subset(DataNum, outliers(DataNum$CoapplicantIncome))
AllOutliers <- rbind(Oco,Oap,Ola)
data.frame(
Jumlah_Outliers = AllOutliers %>% nrow (),
Jumlah_Outliers_Unik = AllOutliers %>% distinct() %>% nrow()
)UniqOurliers <- AllOutliers %>% distinct
NoOutliers <- NoNAData %>% anti_join(UniqOurliers)
NoOutliers1.7 Penanganan Data Kategorikal
1.7.1 Mengubah label
DataKat %>% summarise_all(n_distinct)GenderLabel <-factor(DataKat$Gender, labels=c(0, 1)) # 0=Female 1=Male
MarriedLabel <-factor(DataKat$Married, labels=c(0, 1)) # 0=No 1=Yes
DependentsLabel <-factor(DataKat$Dependents, labels=c(0, 1, 2, 3)) # 0=0 1=1 2=2 3=3+
EducationLabel <-factor(DataKat$Education, labels=c(0, 1)) # 0=Graduate 1=Not Graduate
Self_EmployedLabel <-factor(DataKat$Self_Employed, labels=c(0, 1)) # 0=No 1=Yes
Loan_Amount_TermLabel <-factor(DataKat$Loan_Amount_Term, labels=c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) # 0=12 1=120 2=180 3=240 4= 5=300 6=360 7=480 8=60 9=84
Credit_HistoryLabel <-factor(DataKat$Credit_History, labels=c(0, 1)) # 0=No 1=Yes
Property_AreaLabel <-factor(DataKat$Property_Area, labels=c(0, 1, 2)) # 0=Rural 1=Semiurban 2=Urban
Loan_StatusLabel <-factor(DataKat$Loan_Status, labels=c(0, 1)) # 0=No 1=Yes
DataKatLabeled <- data.frame("ID" = DataKat$ï..Loan_ID, GenderLabel, MarriedLabel, DependentsLabel, EducationLabel, Self_EmployedLabel, Loan_Amount_TermLabel, Credit_HistoryLabel, Property_AreaLabel, Loan_StatusLabel)
DataKatLabeled2 Tugas 2
Lakukan Proses Visualisasi Data dengan menggunakan R dengan beberapa langkah berikut:
2.1 Visualisasi Univariabel
2.1.1 Data Kategorikal
library(patchwork)
plot1_G <- ggplot(NoOutliers, aes(x = Gender))+geom_bar()
plot1_M <- ggplot(NoOutliers, aes(x = Married))+geom_bar()
plot1_D <- ggplot(NoOutliers, aes(x = Dependents))+geom_bar()
plot1_E <- ggplot(NoOutliers, aes(x = Education))+geom_bar()
plot1_SE <- ggplot(NoOutliers, aes(x = Self_Employed))+geom_bar()
plot1_LAT <- ggplot(NoOutliers, aes(x = Loan_Amount_Term))+geom_bar()+theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0.5))
plot1_CH <- ggplot(NoOutliers, aes(x = Credit_History))+geom_bar()
plot1_PA <- ggplot(NoOutliers, aes(x = Property_Area))+geom_bar()
plot1_LS <- ggplot(NoOutliers, aes(x = Loan_Status))+geom_bar()
ggarrange(plot1_G, plot1_M, plot1_D, plot1_E, plot1_SE, plot1_LAT, plot1_CH, plot1_PA, plot1_LS)2.1.2 Data Numerikal
plot1_AI <- ggplot(NoOutliers, aes(x = ApplicantIncome))+geom_histogram(bins = 12, colour = "white")
plot1_CI <- ggplot(NoOutliers, aes(x = CoapplicantIncome))+geom_histogram(bins = 12, colour = "white")
plot1_LA <- ggplot(NoOutliers, aes(x = LoanAmount))+geom_histogram(bins = 12, colour = "white")
ggarrange(plot1_AI, plot1_CI, plot1_LA)2.2 Visualisasi Bivariabel
2.2.1 Kategorikal vs Kategorikal
plot2_G_M <- ggplot(NoOutliers, aes(x = Gender, fill = Married)) +
theme_minimal() + # use a minimal theme
geom_bar(position = position_dodge(preserve = "single"))
plot2_G_E <- ggplot(NoOutliers, aes(x = Gender, fill = Education)) +
theme_minimal() + # use a minimal theme
geom_bar(position = position_dodge(preserve = "single"))
plot2_M_E <- ggplot(NoOutliers, aes(x = Married, fill = Education)) +
theme_minimal() + # use a minimal theme
geom_bar(position = position_dodge(preserve = "single"))
Plot2_E_PA <- ggplot(NoOutliers, aes(x = Education, fill = Property_Area)) +
theme_minimal() + # use a minimal theme
geom_bar(position = position_dodge(preserve = "single"))
ggarrange(plot2_G_M, plot2_G_E, plot2_M_E, Plot2_E_PA)2.2.2 Data Numerikal vs Numerikal
plot2_LA_CI <- ggplot(NoOutliers, aes(x = LoanAmount, y = CoapplicantIncome )) +
theme_minimal() + # use a minimal theme
geom_line()
plot2_LA_AI <- ggplot(NoOutliers, aes(x = LoanAmount, y = ApplicantIncome )) +
theme_minimal() + # use a minimal theme
geom_line()
plot2_AI_CI <- ggplot(NoOutliers, aes(x = ApplicantIncome, y = CoapplicantIncome )) +
theme_minimal() + # use a minimal theme
geom_line()
plot2_CI_LA <- ggplot(NoOutliers, aes(x = CoapplicantIncome, y = LoanAmount )) +
theme_minimal() + # use a minimal theme
geom_line()
plot2_AI_LA <- ggplot(NoOutliers, aes(x = ApplicantIncome, y = LoanAmount )) +
theme_minimal() + # use a minimal theme
geom_line()
plot2_CI_AI <- ggplot(NoOutliers, aes(x = CoapplicantIncome, y = ApplicantIncome )) +
theme_minimal() + # use a minimal theme
geom_line()
ggarrange(plot2_LA_CI, plot2_LA_AI, plot2_AI_CI, plot2_CI_LA, plot2_AI_LA, plot2_CI_AI)2.2.3 Data Numerikal vs Kategorikal
plot2_LA_LS <- ggplot(NoOutliers,
aes(x = LoanAmount,
fill = Loan_Status)) +
geom_density(alpha = 0.3) +
theme_minimal() +
labs(title = "Loan Amount distribution by Loan Amount Term")
plot2_CI_M <- ggplot(NoOutliers,
aes(x = CoapplicantIncome,
fill = Married)) +
geom_density(alpha = 0.3) +
theme_minimal() +
labs(title = "Coapplicant Income distribution by Married")
plot2_AI_E <- ggplot(NoOutliers,
aes(x = ApplicantIncome,
fill = Education)) +
geom_density(alpha = 0.3) +
theme_minimal() +
labs(title = "Applicant Income distribution by Education")
plot2_LA_PA <- ggplot(NoOutliers,
aes(x = ApplicantIncome,
fill = Property_Area)) +
geom_density(alpha = 0.3) +
theme_minimal() +
labs(title = "Applicant Income distribution by Property Area")
ggarrange(plot2_LA_LS, plot2_CI_M, plot2_AI_E, plot2_LA_PA)2.3 Visualisasi Multivariabel
plot3_AI_CI_PA <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=CoapplicantIncome, shape=Property_Area, colour=Property_Area))+geom_point()
plot3_AI_LA_PA <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=LoanAmount, shape=Property_Area, colour=Property_Area))+geom_point()
plot3_LA_CI_PA <-ggplot(NoOutliers, aes(x=LoanAmount, y=CoapplicantIncome, shape=Property_Area, colour=Property_Area))+geom_point()
plot3_AI_CI_LAT <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=CoapplicantIncome, shape=Loan_Amount_Term, colour=Property_Area))+geom_point()
plot3_AI_LA_LAT <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=LoanAmount, shape=Loan_Amount_Term, colour=Property_Area))+geom_point()
plot3_LA_CI_LAT <-ggplot(NoOutliers, aes(x=LoanAmount, y=CoapplicantIncome, shape=Loan_Amount_Term, colour=Property_Area))+geom_point()
plot3_AI_CI_E <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=CoapplicantIncome, shape=Education, colour=Property_Area))+geom_point()
plot3_AI_LA_E <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=LoanAmount, shape=Education, colour=Property_Area))+geom_point()
plot3_LA_CI_E <-ggplot(NoOutliers, aes(x=LoanAmount, y=CoapplicantIncome, shape=Education, colour=Property_Area))+geom_point()
plot3_AI_CI_PAplot3_AI_LA_PAplot3_LA_CI_PAplot3_AI_CI_LATplot3_AI_LA_LATplot3_LA_CI_LATplot3_AI_CI_Eplot3_AI_LA_Eplot3_LA_CI_E3 Tugas 3
Lakukan proses analisa data secara deskriptif menggunakan R dengan beberapa langkah berikut:
3.1 Kualitatif
3.1.1 Kategori Univariat
prop.table(table(NoOutliers$Gender))##
## Female Male
## 0.1869159 0.8130841
prop.table(table(NoOutliers$Married))##
## No Yes
## 0.3495327 0.6504673
prop.table(table(NoOutliers$Dependents))##
## 0 1 2 3+
## 0.60000000 0.15700935 0.16822430 0.07476636
prop.table(table(NoOutliers$Education))##
## Graduate Not Graduate
## 0.7551402 0.2448598
prop.table(table(NoOutliers$Self_Employed))##
## No Yes
## 0.8859813 0.1140187
prop.table(table(NoOutliers$Loan_Amount_Term))##
## 12 120 180 240 300 36
## 0.001869159 0.005607477 0.067289720 0.007476636 0.018691589 0.003738318
## 360 480 60 84
## 0.857943925 0.026168224 0.003738318 0.007476636
prop.table(table(NoOutliers$Credit_History))##
## 0 1
## 0.1457944 0.8542056
prop.table(table(NoOutliers$Property_Area))##
## Rural Semiurban Urban
## 0.2990654 0.3831776 0.3177570
prop.table(table(NoOutliers$Loan_Status))##
## N Y
## 0.3046729 0.6953271
3.1.2 Kategori Bivariat
library(dplyr) # for data manipulation
library(magrittr) # for data manipulation similar to dplyr
NoOutliers %>% select(Gender, Married) %>% table() # load the data and select vectors into matrix and inspect## Married
## Gender No Yes
## Female 72 28
## Male 115 320
NoOutliers %>% select(Gender, Education) %>% table() # load the data and select vectors into matrix and inspect ## Education
## Gender Graduate Not Graduate
## Female 82 18
## Male 322 113
NoOutliers %>% select(Gender, Property_Area) %>% table() # load the data and select vectors into matrix and inspect ## Property_Area
## Gender Rural Semiurban Urban
## Female 24 49 27
## Male 136 156 143
NoOutliers %>% select(Education, Self_Employed) %>% table() # load the data and select vectors into matrix and inspect ## Self_Employed
## Education No Yes
## Graduate 358 46
## Not Graduate 116 15
NoOutliers %>% select(Gender, Loan_Amount_Term) %>% table() # load the data and select vectors into matrix and inspect ## Loan_Amount_Term
## Gender 12 120 180 240 300 36 360 480 60 84
## Female 0 0 2 1 1 1 90 4 0 1
## Male 1 3 34 3 9 1 369 10 2 3
NoOutliers %>% select(Married, Loan_Amount_Term) %>% table() # load the data and select vectors into matrix and inspect ## Loan_Amount_Term
## Married 12 120 180 240 300 36 360 480 60 84
## No 0 1 7 1 3 2 164 8 1 0
## Yes 1 2 29 3 7 0 295 6 1 4
3.1.3 Kategori Multivariat
NoOutliers %>% select(Gender, Married, Education) %>% ftable()## Education Graduate Not Graduate
## Gender Married
## Female No 60 12
## Yes 22 6
## Male No 84 31
## Yes 238 82
NoOutliers %>% select(Gender, Married, Education, Property_Area, Loan_Amount_Term) %>% ftable()## Loan_Amount_Term 12 120 180 240 300 36 360 480 60 84
## Gender Married Education Property_Area
## Female No Graduate Rural 0 0 0 0 0 0 13 2 0 0
## Semiurban 0 0 0 0 1 1 23 0 0 0
## Urban 0 0 1 0 0 0 18 1 0 0
## Not Graduate Rural 0 0 0 0 0 0 4 0 0 0
## Semiurban 0 0 0 0 0 0 5 0 0 0
## Urban 0 0 0 0 0 0 3 0 0 0
## Yes Graduate Rural 0 0 0 0 0 0 3 0 0 0
## Semiurban 0 0 1 1 0 0 11 1 0 1
## Urban 0 0 0 0 0 0 4 0 0 0
## Not Graduate Rural 0 0 0 0 0 0 2 0 0 0
## Semiurban 0 0 0 0 0 0 4 0 0 0
## Urban 0 0 0 0 0 0 0 0 0 0
## Male No Graduate Rural 0 0 0 0 0 0 27 0 0 0
## Semiurban 0 0 1 0 1 0 23 2 0 0
## Urban 0 0 3 0 0 0 24 2 1 0
## Not Graduate Rural 0 0 0 0 1 0 12 0 0 0
## Semiurban 0 1 0 0 0 1 9 0 0 0
## Urban 0 0 2 1 0 0 3 1 0 0
## Yes Graduate Rural 0 0 7 0 1 0 57 0 0 2
## Semiurban 0 1 3 1 3 0 84 2 0 0
## Urban 1 1 5 1 1 0 67 0 0 1
## Not Graduate Rural 0 0 3 0 0 0 26 0 0 0
## Semiurban 0 0 2 0 1 0 19 2 0 0
## Urban 0 0 8 0 1 0 18 1 1 0
3.2 Kuantitatif
3.2.1 Univariat Numerik
3.2.1.1 Summary
NoOutliersNum <- select_if(NoOutliers, is.numeric)
summary(NoOutliersNum)## ApplicantIncome CoapplicantIncome LoanAmount
## Min. : 150 Min. : 0 Min. : 9.0
## 1st Qu.: 2752 1st Qu.: 0 1st Qu.:100.0
## Median : 3598 Median :1260 Median :124.0
## Mean : 4054 Mean :1323 Mean :127.0
## 3rd Qu.: 4891 3rd Qu.:2194 3rd Qu.:151.5
## Max. :10139 Max. :5701 Max. :260.0
3.2.1.2 Variance
sapply(NoOutliersNum, var) #Variance## ApplicantIncome CoapplicantIncome LoanAmount
## 3435005.100 2019826.684 1990.793
3.2.1.3 Standard Deviation
sapply(NoOutliersNum, sd) #Standard Deviation## ApplicantIncome CoapplicantIncome LoanAmount
## 1853.37668 1421.20607 44.61831
3.2.1.4 Median Absolute Deviation
sapply(NoOutliersNum, mad) #Median Absolute Deviation## ApplicantIncome CoapplicantIncome LoanAmount
## 1504.8390 1868.0760 38.5476
3.2.1.5 Inter Quantile Range
sapply(NoOutliersNum, IQR) #Inter Quantile Range## ApplicantIncome CoapplicantIncome LoanAmount
## 2138.5 2194.0 51.5
3.2.1.6 Skewness
sapply(NoOutliersNum, skewness) #skewness## ApplicantIncome CoapplicantIncome LoanAmount
## 1.1358372 0.8411675 0.3989309
3.2.1.7 Kurtosis
sapply(NoOutliersNum, kurtosis) #kurtosis## ApplicantIncome CoapplicantIncome LoanAmount
## 1.19274525 0.01943741 0.47240548
3.2.2 Bivariat Numerik
3.2.2.1 Covariance
cov(NoOutliers$ApplicantIncome,NoOutliers$CoapplicantIncome)## [1] -702662.6
cov(NoOutliers$CoapplicantIncome,NoOutliers$LoanAmount)## [1] 18225.52
cov(NoOutliers$LoanAmount,NoOutliers$ApplicantIncome)## [1] 39323.8
3.2.2.2 Correlation
cor(NoOutliers$ApplicantIncome,NoOutliers$CoapplicantIncome)## [1] -0.2667633
cor(NoOutliers$CoapplicantIncome,NoOutliers$LoanAmount)## [1] 0.2874152
cor(NoOutliers$LoanAmount,NoOutliers$ApplicantIncome)## [1] 0.4755308
3.2.3 Multivariat Numerik
cov(NoOutliersNum)## ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome 3435005.1 -702662.55 39323.796
## CoapplicantIncome -702662.6 2019826.68 18225.520
## LoanAmount 39323.8 18225.52 1990.793
cor(NoOutliersNum)## ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome 1.0000000 -0.2667633 0.4755308
## CoapplicantIncome -0.2667633 1.0000000 0.2874152
## LoanAmount 0.4755308 0.2874152 1.0000000
var(NoOutliersNum)## ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome 3435005.1 -702662.55 39323.796
## CoapplicantIncome -702662.6 2019826.68 18225.520
## LoanAmount 39323.8 18225.52 1990.793
3.3 EDA in Lazy Way
library(funModeling)
library(tidyverse)
library(Hmisc)
library(skimr)
basic_eda <- function(data)
{
glimpse(data)
skim(data)
df_status(data)
freq(data)
profiling_num(data)
plot_num(data)
describe(data)
}
basic_eda(NoOutliers)## Rows: 535
## Columns: 13
## $ ï..Loan_ID <chr> "LP001002", "LP001003", "LP001005", "LP001006", "LP0~
## $ Gender <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Mal~
## $ Married <chr> "No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes"~
## $ Dependents <chr> "0", "1", "0", "0", "0", "0", "3+", "2", "2", "2", "~
## $ Education <chr> "Graduate", "Graduate", "Graduate", "Not Graduate", ~
## $ Self_Employed <chr> "No", "No", "Yes", "No", "No", "No", "No", "No", "No~
## $ ApplicantIncome <int> 5849, 4583, 3000, 2583, 6000, 2333, 3036, 4006, 3200~
## $ CoapplicantIncome <dbl> 0, 1508, 0, 2358, 0, 1516, 2504, 1526, 700, 1840, 28~
## $ LoanAmount <dbl> 146.4122, 128.0000, 66.0000, 120.0000, 141.0000, 95.~
## $ Loan_Amount_Term <chr> "360", "360", "360", "360", "360", "360", "360", "36~
## $ Credit_History <chr> "1", "1", "1", "1", "1", "1", "0", "1", "1", "1", "1~
## $ Property_Area <chr> "Urban", "Rural", "Urban", "Urban", "Urban", "Urban"~
## $ Loan_Status <chr> "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "Y", "Y", "N~
## variable q_zeros p_zeros q_na p_na q_inf p_inf type unique
## 1 ï..Loan_ID 0 0.00 0 0 0 0 character 535
## 2 Gender 0 0.00 0 0 0 0 character 2
## 3 Married 0 0.00 0 0 0 0 character 2
## 4 Dependents 321 60.00 0 0 0 0 character 4
## 5 Education 0 0.00 0 0 0 0 character 2
## 6 Self_Employed 0 0.00 0 0 0 0 character 2
## 7 ApplicantIncome 0 0.00 0 0 0 0 integer 438
## 8 CoapplicantIncome 229 42.80 0 0 0 0 numeric 259
## 9 LoanAmount 0 0.00 0 0 0 0 numeric 161
## 10 Loan_Amount_Term 0 0.00 0 0 0 0 character 10
## 11 Credit_History 78 14.58 0 0 0 0 character 2
## 12 Property_Area 0 0.00 0 0 0 0 character 3
## 13 Loan_Status 0 0.00 0 0 0 0 character 2
## ï..Loan_ID frequency percentage cumulative_perc
## 1 LP001002 1 0.19 0.19
## 2 LP001003 1 0.19 0.38
## 3 LP001005 1 0.19 0.57
## 4 LP001006 1 0.19 0.76
## 5 LP001008 1 0.19 0.95
## 6 LP001013 1 0.19 1.14
## 7 LP001014 1 0.19 1.33
## 8 LP001018 1 0.19 1.52
## 9 LP001024 1 0.19 1.71
## 10 LP001027 1 0.19 1.90
## 11 LP001029 1 0.19 2.09
## 12 LP001030 1 0.19 2.28
## 13 LP001032 1 0.19 2.47
## 14 LP001034 1 0.19 2.66
## 15 LP001036 1 0.19 2.85
## 16 LP001038 1 0.19 3.04
## 17 LP001041 1 0.19 3.23
## 18 LP001043 1 0.19 3.42
## 19 LP001047 1 0.19 3.61
## 20 LP001050 1 0.19 3.80
## 21 LP001052 1 0.19 3.99
## 22 LP001066 1 0.19 4.18
## 23 LP001068 1 0.19 4.37
## 24 LP001073 1 0.19 4.56
## 25 LP001086 1 0.19 4.75
## 26 LP001087 1 0.19 4.94
## 27 LP001091 1 0.19 5.13
## 28 LP001095 1 0.19 5.32
## 29 LP001097 1 0.19 5.51
## 30 LP001098 1 0.19 5.70
## 31 LP001106 1 0.19 5.89
## 32 LP001109 1 0.19 6.08
## 33 LP001112 1 0.19 6.27
## 34 LP001116 1 0.19 6.46
## 35 LP001119 1 0.19 6.65
## 36 LP001120 1 0.19 6.84
## 37 LP001123 1 0.19 7.03
## 38 LP001131 1 0.19 7.22
## 39 LP001136 1 0.19 7.41
## 40 LP001137 1 0.19 7.60
## 41 LP001138 1 0.19 7.79
## 42 LP001144 1 0.19 7.98
## 43 LP001146 1 0.19 8.17
## 44 LP001151 1 0.19 8.36
## 45 LP001155 1 0.19 8.55
## 46 LP001157 1 0.19 8.74
## 47 LP001164 1 0.19 8.93
## 48 LP001179 1 0.19 9.12
## 49 LP001194 1 0.19 9.31
## 50 LP001195 1 0.19 9.50
## 51 LP001197 1 0.19 9.69
## 52 LP001198 1 0.19 9.88
## 53 LP001199 1 0.19 10.07
## 54 LP001205 1 0.19 10.26
## 55 LP001206 1 0.19 10.45
## 56 LP001207 1 0.19 10.64
## 57 LP001213 1 0.19 10.83
## 58 LP001222 1 0.19 11.02
## 59 LP001225 1 0.19 11.21
## 60 LP001228 1 0.19 11.40
## 61 LP001238 1 0.19 11.59
## 62 LP001241 1 0.19 11.78
## 63 LP001243 1 0.19 11.97
## 64 LP001245 1 0.19 12.16
## 65 LP001248 1 0.19 12.35
## 66 LP001250 1 0.19 12.54
## 67 LP001253 1 0.19 12.73
## 68 LP001255 1 0.19 12.92
## 69 LP001256 1 0.19 13.11
## 70 LP001259 1 0.19 13.30
## 71 LP001263 1 0.19 13.49
## 72 LP001264 1 0.19 13.68
## 73 LP001265 1 0.19 13.87
## 74 LP001266 1 0.19 14.06
## 75 LP001267 1 0.19 14.25
## 76 LP001275 1 0.19 14.44
## 77 LP001279 1 0.19 14.63
## 78 LP001280 1 0.19 14.82
## 79 LP001282 1 0.19 15.01
## 80 LP001289 1 0.19 15.20
## 81 LP001310 1 0.19 15.39
## 82 LP001316 1 0.19 15.58
## 83 LP001318 1 0.19 15.77
## 84 LP001319 1 0.19 15.96
## 85 LP001322 1 0.19 16.15
## 86 LP001325 1 0.19 16.34
## 87 LP001326 1 0.19 16.53
## 88 LP001327 1 0.19 16.72
## 89 LP001333 1 0.19 16.91
## 90 LP001334 1 0.19 17.10
## 91 LP001343 1 0.19 17.29
## 92 LP001345 1 0.19 17.48
## 93 LP001349 1 0.19 17.67
## 94 LP001356 1 0.19 17.86
## 95 LP001357 1 0.19 18.05
## 96 LP001367 1 0.19 18.24
## 97 LP001370 1 0.19 18.43
## 98 LP001379 1 0.19 18.62
## 99 LP001384 1 0.19 18.81
## 100 LP001385 1 0.19 19.00
## 101 LP001387 1 0.19 19.19
## 102 LP001391 1 0.19 19.38
## 103 LP001392 1 0.19 19.57
## 104 LP001398 1 0.19 19.76
## 105 LP001404 1 0.19 19.95
## 106 LP001405 1 0.19 20.14
## 107 LP001421 1 0.19 20.33
## 108 LP001426 1 0.19 20.52
## 109 LP001430 1 0.19 20.71
## 110 LP001432 1 0.19 20.90
## 111 LP001439 1 0.19 21.09
## 112 LP001443 1 0.19 21.28
## 113 LP001449 1 0.19 21.47
## 114 LP001465 1 0.19 21.66
## 115 LP001473 1 0.19 21.85
## 116 LP001478 1 0.19 22.04
## 117 LP001482 1 0.19 22.23
## 118 LP001487 1 0.19 22.42
## 119 LP001489 1 0.19 22.61
## 120 LP001491 1 0.19 22.80
## 121 LP001493 1 0.19 22.99
## 122 LP001497 1 0.19 23.18
## 123 LP001498 1 0.19 23.37
## 124 LP001504 1 0.19 23.56
## 125 LP001507 1 0.19 23.75
## 126 LP001514 1 0.19 23.94
## 127 LP001518 1 0.19 24.13
## 128 LP001519 1 0.19 24.32
## 129 LP001520 1 0.19 24.51
## 130 LP001528 1 0.19 24.70
## 131 LP001529 1 0.19 24.89
## 132 LP001531 1 0.19 25.08
## 133 LP001532 1 0.19 25.27
## 134 LP001535 1 0.19 25.46
## 135 LP001541 1 0.19 25.65
## 136 LP001543 1 0.19 25.84
## 137 LP001546 1 0.19 26.03
## 138 LP001552 1 0.19 26.22
## 139 LP001560 1 0.19 26.41
## 140 LP001565 1 0.19 26.60
## 141 LP001570 1 0.19 26.79
## 142 LP001572 1 0.19 26.98
## 143 LP001574 1 0.19 27.17
## 144 LP001577 1 0.19 27.36
## 145 LP001578 1 0.19 27.55
## 146 LP001579 1 0.19 27.74
## 147 LP001580 1 0.19 27.93
## 148 LP001581 1 0.19 28.12
## 149 LP001586 1 0.19 28.31
## 150 LP001594 1 0.19 28.50
## 151 LP001603 1 0.19 28.69
## 152 LP001606 1 0.19 28.88
## 153 LP001608 1 0.19 29.07
## 154 LP001616 1 0.19 29.26
## 155 LP001630 1 0.19 29.45
## 156 LP001634 1 0.19 29.64
## 157 LP001636 1 0.19 29.83
## 158 LP001639 1 0.19 30.02
## 159 LP001641 1 0.19 30.21
## 160 LP001643 1 0.19 30.40
## 161 LP001644 1 0.19 30.59
## 162 LP001647 1 0.19 30.78
## 163 LP001653 1 0.19 30.97
## 164 LP001657 1 0.19 31.16
## 165 LP001658 1 0.19 31.35
## 166 LP001664 1 0.19 31.54
## 167 LP001665 1 0.19 31.73
## 168 LP001666 1 0.19 31.92
## 169 LP001669 1 0.19 32.11
## 170 LP001671 1 0.19 32.30
## 171 LP001674 1 0.19 32.49
## 172 LP001677 1 0.19 32.68
## 173 LP001682 1 0.19 32.87
## 174 LP001688 1 0.19 33.06
## 175 LP001691 1 0.19 33.25
## 176 LP001692 1 0.19 33.44
## 177 LP001693 1 0.19 33.63
## 178 LP001698 1 0.19 33.82
## 179 LP001699 1 0.19 34.01
## 180 LP001702 1 0.19 34.20
## 181 LP001708 1 0.19 34.39
## 182 LP001711 1 0.19 34.58
## 183 LP001713 1 0.19 34.77
## 184 LP001715 1 0.19 34.96
## 185 LP001716 1 0.19 35.15
## 186 LP001720 1 0.19 35.34
## 187 LP001722 1 0.19 35.53
## 188 LP001726 1 0.19 35.72
## 189 LP001732 1 0.19 35.91
## 190 LP001734 1 0.19 36.10
## 191 LP001736 1 0.19 36.29
## 192 LP001743 1 0.19 36.48
## 193 LP001744 1 0.19 36.67
## 194 LP001749 1 0.19 36.86
## 195 LP001750 1 0.19 37.05
## 196 LP001751 1 0.19 37.24
## 197 LP001754 1 0.19 37.43
## 198 LP001758 1 0.19 37.62
## 199 LP001760 1 0.19 37.81
## 200 LP001761 1 0.19 38.00
## 201 LP001765 1 0.19 38.19
## 202 LP001768 1 0.19 38.38
## 203 LP001770 1 0.19 38.57
## 204 LP001778 1 0.19 38.76
## 205 LP001784 1 0.19 38.95
## 206 LP001786 1 0.19 39.14
## 207 LP001788 1 0.19 39.33
## 208 LP001790 1 0.19 39.52
## 209 LP001792 1 0.19 39.71
## 210 LP001798 1 0.19 39.90
## 211 LP001800 1 0.19 40.09
## 212 LP001806 1 0.19 40.28
## 213 LP001807 1 0.19 40.47
## 214 LP001811 1 0.19 40.66
## 215 LP001813 1 0.19 40.85
## 216 LP001814 1 0.19 41.04
## 217 LP001819 1 0.19 41.23
## 218 LP001824 1 0.19 41.42
## 219 LP001825 1 0.19 41.61
## 220 LP001835 1 0.19 41.80
## 221 LP001836 1 0.19 41.99
## 222 LP001841 1 0.19 42.18
## 223 LP001846 1 0.19 42.37
## 224 LP001849 1 0.19 42.56
## 225 LP001854 1 0.19 42.75
## 226 LP001864 1 0.19 42.94
## 227 LP001868 1 0.19 43.13
## 228 LP001870 1 0.19 43.32
## 229 LP001871 1 0.19 43.51
## 230 LP001872 1 0.19 43.70
## 231 LP001875 1 0.19 43.89
## 232 LP001877 1 0.19 44.08
## 233 LP001882 1 0.19 44.27
## 234 LP001883 1 0.19 44.46
## 235 LP001884 1 0.19 44.65
## 236 LP001888 1 0.19 44.84
## 237 LP001892 1 0.19 45.03
## 238 LP001894 1 0.19 45.22
## 239 LP001896 1 0.19 45.41
## 240 LP001900 1 0.19 45.60
## 241 LP001903 1 0.19 45.79
## 242 LP001904 1 0.19 45.98
## 243 LP001908 1 0.19 46.17
## 244 LP001910 1 0.19 46.36
## 245 LP001914 1 0.19 46.55
## 246 LP001915 1 0.19 46.74
## 247 LP001917 1 0.19 46.93
## 248 LP001924 1 0.19 47.12
## 249 LP001925 1 0.19 47.31
## 250 LP001926 1 0.19 47.50
## 251 LP001931 1 0.19 47.69
## 252 LP001935 1 0.19 47.88
## 253 LP001936 1 0.19 48.07
## 254 LP001938 1 0.19 48.26
## 255 LP001940 1 0.19 48.45
## 256 LP001945 1 0.19 48.64
## 257 LP001947 1 0.19 48.83
## 258 LP001949 1 0.19 49.02
## 259 LP001953 1 0.19 49.21
## 260 LP001954 1 0.19 49.40
## 261 LP001955 1 0.19 49.59
## 262 LP001963 1 0.19 49.78
## 263 LP001964 1 0.19 49.97
## 264 LP001972 1 0.19 50.16
## 265 LP001974 1 0.19 50.35
## 266 LP001977 1 0.19 50.54
## 267 LP001978 1 0.19 50.73
## 268 LP001990 1 0.19 50.92
## 269 LP001993 1 0.19 51.11
## 270 LP001994 1 0.19 51.30
## 271 LP001998 1 0.19 51.49
## 272 LP002002 1 0.19 51.68
## 273 LP002004 1 0.19 51.87
## 274 LP002006 1 0.19 52.06
## 275 LP002008 1 0.19 52.25
## 276 LP002024 1 0.19 52.44
## 277 LP002031 1 0.19 52.63
## 278 LP002035 1 0.19 52.82
## 279 LP002036 1 0.19 53.01
## 280 LP002043 1 0.19 53.20
## 281 LP002050 1 0.19 53.39
## 282 LP002051 1 0.19 53.58
## 283 LP002053 1 0.19 53.77
## 284 LP002054 1 0.19 53.96
## 285 LP002055 1 0.19 54.15
## 286 LP002068 1 0.19 54.34
## 287 LP002082 1 0.19 54.53
## 288 LP002086 1 0.19 54.72
## 289 LP002087 1 0.19 54.91
## 290 LP002097 1 0.19 55.10
## 291 LP002098 1 0.19 55.29
## 292 LP002100 1 0.19 55.48
## 293 LP002103 1 0.19 55.67
## 294 LP002106 1 0.19 55.86
## 295 LP002110 1 0.19 56.05
## 296 LP002112 1 0.19 56.24
## 297 LP002113 1 0.19 56.43
## 298 LP002114 1 0.19 56.62
## 299 LP002115 1 0.19 56.81
## 300 LP002116 1 0.19 57.00
## 301 LP002119 1 0.19 57.19
## 302 LP002126 1 0.19 57.38
## 303 LP002128 1 0.19 57.57
## 304 LP002129 1 0.19 57.76
## 305 LP002130 1 0.19 57.95
## 306 LP002131 1 0.19 58.14
## 307 LP002137 1 0.19 58.33
## 308 LP002139 1 0.19 58.52
## 309 LP002141 1 0.19 58.71
## 310 LP002142 1 0.19 58.90
## 311 LP002143 1 0.19 59.09
## 312 LP002144 1 0.19 59.28
## 313 LP002149 1 0.19 59.47
## 314 LP002151 1 0.19 59.66
## 315 LP002158 1 0.19 59.85
## 316 LP002160 1 0.19 60.04
## 317 LP002161 1 0.19 60.23
## 318 LP002170 1 0.19 60.42
## 319 LP002175 1 0.19 60.61
## 320 LP002178 1 0.19 60.80
## 321 LP002180 1 0.19 60.99
## 322 LP002181 1 0.19 61.18
## 323 LP002187 1 0.19 61.37
## 324 LP002188 1 0.19 61.56
## 325 LP002190 1 0.19 61.75
## 326 LP002197 1 0.19 61.94
## 327 LP002205 1 0.19 62.13
## 328 LP002209 1 0.19 62.32
## 329 LP002211 1 0.19 62.51
## 330 LP002219 1 0.19 62.70
## 331 LP002223 1 0.19 62.89
## 332 LP002224 1 0.19 63.08
## 333 LP002225 1 0.19 63.27
## 334 LP002226 1 0.19 63.46
## 335 LP002231 1 0.19 63.65
## 336 LP002234 1 0.19 63.84
## 337 LP002236 1 0.19 64.03
## 338 LP002237 1 0.19 64.22
## 339 LP002239 1 0.19 64.41
## 340 LP002243 1 0.19 64.60
## 341 LP002244 1 0.19 64.79
## 342 LP002250 1 0.19 64.98
## 343 LP002255 1 0.19 65.17
## 344 LP002263 1 0.19 65.36
## 345 LP002265 1 0.19 65.55
## 346 LP002266 1 0.19 65.74
## 347 LP002272 1 0.19 65.93
## 348 LP002277 1 0.19 66.12
## 349 LP002281 1 0.19 66.31
## 350 LP002284 1 0.19 66.50
## 351 LP002287 1 0.19 66.69
## 352 LP002288 1 0.19 66.88
## 353 LP002296 1 0.19 67.07
## 354 LP002300 1 0.19 67.26
## 355 LP002301 1 0.19 67.45
## 356 LP002305 1 0.19 67.64
## 357 LP002308 1 0.19 67.83
## 358 LP002314 1 0.19 68.02
## 359 LP002315 1 0.19 68.21
## 360 LP002318 1 0.19 68.40
## 361 LP002319 1 0.19 68.59
## 362 LP002328 1 0.19 68.78
## 363 LP002332 1 0.19 68.97
## 364 LP002335 1 0.19 69.16
## 365 LP002337 1 0.19 69.35
## 366 LP002341 1 0.19 69.54
## 367 LP002345 1 0.19 69.73
## 368 LP002347 1 0.19 69.92
## 369 LP002348 1 0.19 70.11
## 370 LP002357 1 0.19 70.30
## 371 LP002361 1 0.19 70.49
## 372 LP002362 1 0.19 70.68
## 373 LP002366 1 0.19 70.87
## 374 LP002367 1 0.19 71.06
## 375 LP002368 1 0.19 71.25
## 376 LP002369 1 0.19 71.44
## 377 LP002370 1 0.19 71.63
## 378 LP002377 1 0.19 71.82
## 379 LP002379 1 0.19 72.01
## 380 LP002387 1 0.19 72.20
## 381 LP002390 1 0.19 72.39
## 382 LP002393 1 0.19 72.58
## 383 LP002398 1 0.19 72.77
## 384 LP002401 1 0.19 72.96
## 385 LP002407 1 0.19 73.15
## 386 LP002408 1 0.19 73.34
## 387 LP002409 1 0.19 73.53
## 388 LP002418 1 0.19 73.72
## 389 LP002429 1 0.19 73.91
## 390 LP002434 1 0.19 74.10
## 391 LP002435 1 0.19 74.29
## 392 LP002443 1 0.19 74.48
## 393 LP002444 1 0.19 74.67
## 394 LP002446 1 0.19 74.86
## 395 LP002447 1 0.19 75.05
## 396 LP002448 1 0.19 75.24
## 397 LP002449 1 0.19 75.43
## 398 LP002453 1 0.19 75.62
## 399 LP002455 1 0.19 75.81
## 400 LP002459 1 0.19 76.00
## 401 LP002467 1 0.19 76.19
## 402 LP002472 1 0.19 76.38
## 403 LP002473 1 0.19 76.57
## 404 LP002478 1 0.19 76.76
## 405 LP002484 1 0.19 76.95
## 406 LP002487 1 0.19 77.14
## 407 LP002489 1 0.19 77.33
## 408 LP002493 1 0.19 77.52
## 409 LP002494 1 0.19 77.71
## 410 LP002500 1 0.19 77.90
## 411 LP002502 1 0.19 78.09
## 412 LP002505 1 0.19 78.28
## 413 LP002515 1 0.19 78.47
## 414 LP002517 1 0.19 78.66
## 415 LP002519 1 0.19 78.85
## 416 LP002522 1 0.19 79.04
## 417 LP002524 1 0.19 79.23
## 418 LP002529 1 0.19 79.42
## 419 LP002530 1 0.19 79.61
## 420 LP002533 1 0.19 79.80
## 421 LP002534 1 0.19 79.99
## 422 LP002536 1 0.19 80.18
## 423 LP002537 1 0.19 80.37
## 424 LP002543 1 0.19 80.56
## 425 LP002544 1 0.19 80.75
## 426 LP002545 1 0.19 80.94
## 427 LP002555 1 0.19 81.13
## 428 LP002556 1 0.19 81.32
## 429 LP002560 1 0.19 81.51
## 430 LP002562 1 0.19 81.70
## 431 LP002571 1 0.19 81.89
## 432 LP002585 1 0.19 82.08
## 433 LP002586 1 0.19 82.27
## 434 LP002587 1 0.19 82.46
## 435 LP002588 1 0.19 82.65
## 436 LP002600 1 0.19 82.84
## 437 LP002602 1 0.19 83.03
## 438 LP002603 1 0.19 83.22
## 439 LP002606 1 0.19 83.41
## 440 LP002615 1 0.19 83.60
## 441 LP002618 1 0.19 83.79
## 442 LP002619 1 0.19 83.98
## 443 LP002622 1 0.19 84.17
## 444 LP002625 1 0.19 84.36
## 445 LP002626 1 0.19 84.55
## 446 LP002637 1 0.19 84.74
## 447 LP002640 1 0.19 84.93
## 448 LP002643 1 0.19 85.12
## 449 LP002659 1 0.19 85.31
## 450 LP002670 1 0.19 85.50
## 451 LP002682 1 0.19 85.69
## 452 LP002683 1 0.19 85.88
## 453 LP002684 1 0.19 86.07
## 454 LP002689 1 0.19 86.26
## 455 LP002690 1 0.19 86.45
## 456 LP002692 1 0.19 86.64
## 457 LP002697 1 0.19 86.83
## 458 LP002705 1 0.19 87.02
## 459 LP002706 1 0.19 87.21
## 460 LP002714 1 0.19 87.40
## 461 LP002716 1 0.19 87.59
## 462 LP002717 1 0.19 87.78
## 463 LP002720 1 0.19 87.97
## 464 LP002723 1 0.19 88.16
## 465 LP002732 1 0.19 88.35
## 466 LP002738 1 0.19 88.54
## 467 LP002739 1 0.19 88.73
## 468 LP002740 1 0.19 88.92
## 469 LP002741 1 0.19 89.11
## 470 LP002743 1 0.19 89.30
## 471 LP002753 1 0.19 89.49
## 472 LP002755 1 0.19 89.68
## 473 LP002757 1 0.19 89.87
## 474 LP002767 1 0.19 90.06
## 475 LP002768 1 0.19 90.25
## 476 LP002772 1 0.19 90.44
## 477 LP002776 1 0.19 90.63
## 478 LP002777 1 0.19 90.82
## 479 LP002778 1 0.19 91.01
## 480 LP002784 1 0.19 91.20
## 481 LP002785 1 0.19 91.39
## 482 LP002788 1 0.19 91.58
## 483 LP002789 1 0.19 91.77
## 484 LP002792 1 0.19 91.96
## 485 LP002794 1 0.19 92.15
## 486 LP002795 1 0.19 92.34
## 487 LP002798 1 0.19 92.53
## 488 LP002804 1 0.19 92.72
## 489 LP002807 1 0.19 92.91
## 490 LP002820 1 0.19 93.10
## 491 LP002821 1 0.19 93.29
## 492 LP002832 1 0.19 93.48
## 493 LP002833 1 0.19 93.67
## 494 LP002836 1 0.19 93.86
## 495 LP002837 1 0.19 94.05
## 496 LP002840 1 0.19 94.24
## 497 LP002841 1 0.19 94.43
## 498 LP002842 1 0.19 94.62
## 499 LP002847 1 0.19 94.81
## 500 LP002862 1 0.19 95.00
## 501 LP002863 1 0.19 95.19
## 502 LP002868 1 0.19 95.38
## 503 LP002872 1 0.19 95.57
## 504 LP002874 1 0.19 95.76
## 505 LP002877 1 0.19 95.95
## 506 LP002888 1 0.19 96.14
## 507 LP002892 1 0.19 96.33
## 508 LP002894 1 0.19 96.52
## 509 LP002898 1 0.19 96.71
## 510 LP002911 1 0.19 96.90
## 511 LP002912 1 0.19 97.09
## 512 LP002916 1 0.19 97.28
## 513 LP002917 1 0.19 97.47
## 514 LP002925 1 0.19 97.66
## 515 LP002926 1 0.19 97.85
## 516 LP002928 1 0.19 98.04
## 517 LP002931 1 0.19 98.23
## 518 LP002936 1 0.19 98.42
## 519 LP002940 1 0.19 98.61
## 520 LP002941 1 0.19 98.80
## 521 LP002943 1 0.19 98.99
## 522 LP002945 1 0.19 99.18
## 523 LP002948 1 0.19 99.37
## 524 LP002950 1 0.19 99.56
## 525 LP002953 1 0.19 99.75
## 526 LP002958 1 0.19 99.94
## 527 LP002960 1 0.19 100.13
## 528 LP002961 1 0.19 100.32
## 529 LP002964 1 0.19 100.51
## 530 LP002974 1 0.19 100.70
## 531 LP002978 1 0.19 100.89
## 532 LP002979 1 0.19 101.08
## 533 LP002983 1 0.19 101.27
## 534 LP002984 1 0.19 101.46
## 535 LP002990 1 0.19 100.00
## Gender frequency percentage cumulative_perc
## 1 Male 435 81.31 81.31
## 2 Female 100 18.69 100.00
## Married frequency percentage cumulative_perc
## 1 Yes 348 65.05 65.05
## 2 No 187 34.95 100.00
## Dependents frequency percentage cumulative_perc
## 1 0 321 60.00 60.00
## 2 2 90 16.82 76.82
## 3 1 84 15.70 92.52
## 4 3+ 40 7.48 100.00
## Education frequency percentage cumulative_perc
## 1 Graduate 404 75.51 75.51
## 2 Not Graduate 131 24.49 100.00
## Self_Employed frequency percentage cumulative_perc
## 1 No 474 88.6 88.6
## 2 Yes 61 11.4 100.0
## Loan_Amount_Term frequency percentage cumulative_perc
## 1 360 459 85.79 85.79
## 2 180 36 6.73 92.52
## 3 480 14 2.62 95.14
## 4 300 10 1.87 97.01
## 5 240 4 0.75 97.76
## 6 84 4 0.75 98.51
## 7 120 3 0.56 99.07
## 8 36 2 0.37 99.44
## 9 60 2 0.37 99.81
## 10 12 1 0.19 100.00
## Credit_History frequency percentage cumulative_perc
## 1 1 457 85.42 85.42
## 2 0 78 14.58 100.00
## Property_Area frequency percentage cumulative_perc
## 1 Semiurban 205 38.32 38.32
## 2 Urban 170 31.78 70.10
## 3 Rural 160 29.91 100.00
## Loan_Status frequency percentage cumulative_perc
## 1 Y 372 69.53 69.53
## 2 N 163 30.47 100.00
## data
##
## 13 Variables 535 Observations
## --------------------------------------------------------------------------------
## ï..Loan_ID
## n missing distinct
## 535 0 535
##
## lowest : LP001002 LP001003 LP001005 LP001006 LP001008
## highest: LP002978 LP002979 LP002983 LP002984 LP002990
## --------------------------------------------------------------------------------
## Gender
## n missing distinct
## 535 0 2
##
## Value Female Male
## Frequency 100 435
## Proportion 0.187 0.813
## --------------------------------------------------------------------------------
## Married
## n missing distinct
## 535 0 2
##
## Value No Yes
## Frequency 187 348
## Proportion 0.35 0.65
## --------------------------------------------------------------------------------
## Dependents
## n missing distinct
## 535 0 4
##
## Value 0 1 2 3+
## Frequency 321 84 90 40
## Proportion 0.600 0.157 0.168 0.075
## --------------------------------------------------------------------------------
## Education
## n missing distinct
## 535 0 2
##
## Value Graduate Not Graduate
## Frequency 404 131
## Proportion 0.755 0.245
## --------------------------------------------------------------------------------
## Self_Employed
## n missing distinct
## 535 0 2
##
## Value No Yes
## Frequency 474 61
## Proportion 0.886 0.114
## --------------------------------------------------------------------------------
## ApplicantIncome
## n missing distinct Info Mean Gmd .05 .10
## 535 0 438 1 4054 1986 1878 2200
## .25 .50 .75 .90 .95
## 2752 3598 4891 6467 8022
##
## lowest : 150 210 645 674 1000, highest: 9833 9963 10000 10047 10139
## --------------------------------------------------------------------------------
## CoapplicantIncome
## n missing distinct Info Mean Gmd .05 .10
## 535 0 259 0.922 1323 1528 0 0
## .25 .50 .75 .90 .95
## 0 1260 2194 3258 4130
##
## lowest : 0.00 16.12 189.00 240.00 242.00
## highest: 5500.00 5624.00 5625.00 5654.00 5701.00
## --------------------------------------------------------------------------------
## LoanAmount
## n missing distinct Info Mean Gmd .05 .10
## 535 0 161 1 127 49.51 55.7 71.0
## .25 .50 .75 .90 .95
## 100.0 124.0 151.5 185.6 205.6
##
## lowest : 9 17 25 26 30, highest: 253 255 258 259 260
## --------------------------------------------------------------------------------
## Loan_Amount_Term
## n missing distinct
## 535 0 10
##
## lowest : 12 120 180 240 300, highest: 36 360 480 60 84
##
## Value 12 120 180 240 300 36 360 480 60 84
## Frequency 1 3 36 4 10 2 459 14 2 4
## Proportion 0.002 0.006 0.067 0.007 0.019 0.004 0.858 0.026 0.004 0.007
## --------------------------------------------------------------------------------
## Credit_History
## n missing distinct
## 535 0 2
##
## Value 0 1
## Frequency 78 457
## Proportion 0.146 0.854
## --------------------------------------------------------------------------------
## Property_Area
## n missing distinct
## 535 0 3
##
## Value Rural Semiurban Urban
## Frequency 160 205 170
## Proportion 0.299 0.383 0.318
## --------------------------------------------------------------------------------
## Loan_Status
## n missing distinct
## 535 0 2
##
## Value N Y
## Frequency 163 372
## Proportion 0.305 0.695
## --------------------------------------------------------------------------------
4 Tugas 4
Lakukan pemeriksaan distribusi densitas menggunakan R pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:
4.1 Univariat numerik
fhat <- kde(x=NoOutliersNum[,3])
plot_1 <- plot(fhat, cont=50, col.cont=4, cont.lwd=2, xlab="Loan Amount", drawpoints=TRUE)fhat1 <- kde(x=NoOutliersNum[,2])
plot_2 <- plot(fhat1, cont=50, col.cont=4, cont.lwd=2, xlab="Coapplicant Income" , drawpoints=TRUE)fhat3 <- kde(x=NoOutliersNum[,1])
plot_3 <- plot(fhat3, cont=50, col.cont=4, cont.lwd=2, xlab="Applicant Income", drawpoints=TRUE)4.2 Bivariat numerik
fhat4 <- kde(x=NoOutliersNum[,2:3])
plot(fhat4, display="filled.contour", cont=seq(10,90,by=10), lwd=1)plot(fhat4, display="persp", border=1)4.3 Multivariat numerik
fhat5 <- kde(x=NoOutliersNum[,1:3])
plot(fhat5)5 Tugas 5
Lakukan proses pengujian Hipotesis menggunakan R pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:
5.1 Hitunglah margin of error dan estimasi interval untuk proporsi peminjam bejenis kelamin perempuan dalam pada tingkat kepercayaan 95%.
library(MASS) # load the MASS package
k = sum(NoOutliers$Gender == "Female") # the sum of female responses
n = sum(count(NoOutliers)) # total data
pbar = k/n # female student proportion in survey
SE = sqrt(pbar*(1-pbar)/n); SE # standard error ## [1] 0.01685443
E = qnorm(.975)*SE; E # margin of error## [1] 0.03303407
pbar + c(-E, E) # the CI of sample proportion## [1] 0.1538818 0.2199500
5.2 Jika anda berencana menggunakan perkiraan proporsi 50% data konsumen berjenis kelamin perempuan, temukan ukuran sampel yang diperlukan untuk mencapai margin kesalahan 5% untuk data obeservasi pada tingkat kepercayaan 95%.
zstar = qnorm(.975) # quantiles (95% confidence level)
p = 0.5 # 50% planned proportion estimate
E = 0.05 # expected error
zstar^2*p*(1-p)/E^2 # sampling size## [1] 384.1459
5.3 Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, jika Bank mengklaim bahwa pinjaman rata-rata konsumen adalah:
set.seed(100)
DataSample <- sample_n(NoOutliers, 30)
DataSample5.3.1 Lebih besar $ 150.
\[H_0 = \mu \leq \$150 \\ \] \[H_1 = \mu > \$150 \]
mu0 = 150 # hypothesized value
xbar = mean(DataSample$LoanAmount) # sample mean
s = sd(DataSample$LoanAmount) # sample standard deviation
n = sum(count(DataSample)) # sample size
t = (xbar-mu0)/(s/sqrt(n));t # test statistic ## [1] -2.520249
alpha = .05 # use 0.05 left tail significant level
t.alpha = qt(1-alpha, df=n-1) # right tail critical value
t.alpha## [1] 1.699127
Karena \(t_{hitung}<t_{table}\), maka terima \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen lebih kecil sama dengan $150 dengan tingkat signifikansi 5%.
5.3.2 Lebih kecil $150
\[H_0 = \mu \geq \$150 \\ \] \[H_1 = \mu < \$150 \]
mu0 = 150 # hypothesized value
xbar = mean(DataSample$LoanAmount) # sample mean
s = sd(DataSample$LoanAmount) # sample standard deviation
n = sum(count(DataSample)) # sample size
t = (xbar-mu0)/(s/sqrt(n));t # test statistic ## [1] -2.520249
alpha = .05 # use 0.05 left tail significant level
t.alpha = qt(1-alpha, df=n-1) # right tail critical value
-t.alpha # left tail critical value ## [1] -1.699127
Karena \(t_{hitung}<t_{table}\), maka tolak \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen lebih kecil sama dengan $150 dengan tingkat signifikansi 5%.
5.3.3 Sama dengan $ 150.
\[H_0 = \mu = \$150 \\ \] \[H_1 = \mu \neq \$150 \]
mu0 = 150 # hypothesized value
xbar = mean(DataSample$LoanAmount) # sample mean
s = sd(DataSample$LoanAmount) # sample standard deviation
n = sum(count(DataSample)) # sample size
t = (xbar-mu0)/(s/sqrt(n));t # test statistic ## [1] -2.520249
alpha = .05 # use 0.05 left tail significant level
t.alpha = qt(1-alpha, df=n-1) # right tail critical value
t.alpha ## [1] 1.699127
-t.alpha## [1] -1.699127
Karena \(t_{hitung}\) berada diluar \(interval~t_{table}\), maka tolak \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen tidak sama dengan $150 dengan tingkat signifikansi 5%.
5.4 Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, seperti diatas jika diketahui simpangan baku pinjaman adalah $ 85.
5.4.1 Lebih besar $ 150.
\[H_0 = \mu \leq \$150 \\ \] \[H_1 = \mu > \$150 \]
mu0 = 150 # hypothesized value
xbar = mean(DataSample$LoanAmount) # sample mean
s = 85 # sample standard deviation
n = sum(count(DataSample)) # sample size
t = (xbar-mu0)/(s/sqrt(n));t # test statistic ## [1] -1.546511
alpha = .05 # use 0.05 left tail significant level
t.alpha = qt(1-alpha, df=n-1) # right tail critical value
t.alpha## [1] 1.699127
Karena \(t_{hitung}<t_{table}\), maka terima \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen lebih besar sama dengan $150 dengan tingkat signifikansi 5%.
5.4.2 Lebih kecil $150
\[H_0 = \mu \geq \$150 \\ \] \[H_1 = \mu < \$150 \]
mu0 = 150 # hypothesized value
xbar = mean(DataSample$LoanAmount) # sample mean
s = 85 # sample standard deviation
n = sum(count(DataSample)) # sample size
t = (xbar-mu0)/(s/sqrt(n));t # test statistic ## [1] -1.546511
alpha = .05 # use 0.05 left tail significant level
t.alpha = qt(1-alpha, df=n-1) # right tail critical value
-t.alpha # left tail critical value ## [1] -1.699127
Karena \(t_{hitung}<t_{table}\), maka terima \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen lebih kecil sama dengan $150 dengan tingkat signifikansi 5%.
5.4.3 Sama dengan $ 150.
\[H_0 = \mu = \$150 \\ \] \[H_1 = \mu \neq \$150 \]
mu0 = 150 # hypothesized value
xbar = mean(DataSample$LoanAmount) # sample mean
s = 85 # sample standard deviation
n = sum(count(DataSample)) # sample size
t = (xbar-mu0)/(s/sqrt(n));t # test statistic ## [1] -1.546511
alpha = .05 # use 0.05 left tail significant level
t.alpha = qt(1-alpha, df=n-1) # right tail critical value
t.alpha ## [1] 1.699127
-t.alpha## [1] -1.699127
Karena \(t_{hitung}\) berada didalam \(interval~t_{table}\), maka terima \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen sama dengan $150 dengan tingkat signifikansi 5%.
6 Refensi
- https://bookdown.org/BaktiSiregar/data-science-for-beginners-part-2
- https://bookdown.org/BaktiSiregar/data-science-for-beginners/
- https://seaborn.pydata.org/generated/seaborn.jointplot.html
- https://www.python-graph-gallery.com/
- https://stackoverflow.com/
- https://rpubs.com/dsciencelabs/ks3
- https://rpubs.com/dsciencelabs/ks4