Computational Statistics

Mid Term - Week 8

Email           :
RPubs          : https://rpubs.com/invokerarts/
Github         : https://github.com/invokerarts
Majors         : Business Statistics
Address      : ARA Center, Matana University Tower Jl. CBD Barat Kav, RT.1, Curug Sangereng,
                      Kelapa Dua, Tangerang, Banten 15810.



library(dplyr)
library(quantable)
library(naniar)
library(ggplot2)
library(e1071)
library(ggpubr)
library(ks)

1 Tugas 1

1.1 Import Data

DataUTS <- read.csv("loan-train.csv", sep = ",")
DataUTS

1.2 Penanganan Data Hilang

ColNV <- unique(names(DataUTS)[col(DataUTS)[which(DataUTS == "")]])
ColNV
## [1] "Gender"        "Married"       "Dependents"    "Self_Employed"
DataUTSwithNA <- DataUTS %>%
  mutate(Gender = replace(Gender, Gender == "", NA)) %>% 
  mutate(Married = replace(Married, Married == "", NA)) %>% 
  mutate(Dependents = replace(Dependents, Dependents == "", NA)) %>% 
  mutate(Self_Employed = replace(Self_Employed, Self_Employed == "", NA))


ColNA <- colnames(DataUTSwithNA)[ apply(DataUTSwithNA, 2, anyNA) ]
ColNA
## [1] "Gender"           "Married"          "Dependents"       "Self_Employed"   
## [5] "LoanAmount"       "Loan_Amount_Term" "Credit_History"
modes <- function(x) {
  ux <- unique(x)
  tab <- tabulate(match(x, ux))
  ux[tab == max(tab)]
}

# replace data numerik dengan rata-ratanya dan data kategorikal dengan modusnya
NoNAData <- DataUTSwithNA %>% 
            mutate(LoanAmount       = ifelse(is.na(LoanAmount), mean(na.omit(DataUTSwithNA$LoanAmount)), LoanAmount),
                   Loan_Amount_Term = as.character(ifelse(is.na(Loan_Amount_Term), modes(na.omit(DataUTSwithNA$Loan_Amount_Term)), Loan_Amount_Term)),
                   Credit_History   = as.character(ifelse(is.na(Credit_History), modes(na.omit(DataUTSwithNA$Credit_History)), Credit_History)),
                   Gender           = replace(Gender, is.na(Gender), modes(Gender)),
                   Married          = replace(Married, is.na(Married), modes(Married)),
                   Dependents       = replace(Dependents, is.na(Dependents), modes(Dependents)),
                   Self_Employed    = replace(Self_Employed, is.na(Self_Employed), modes(Self_Employed))
                  )
NoNAData

1.3 Periksa Data Duplikat

data.frame(
  Jumlah_Data = NoNAData %>% nrow (),
  Jumlah_Data_Unik = NoNAData %>% distinct() %>% nrow()
)

1.4 Pemisahan Data Kategori dan Numerik

DataKat <- select_if(NoNAData, is.character)
DataNum <- select_if(NoNAData, is.numeric)

DataKat
DataNum

1.5 Penanganan Data Numerik

1.5.1 Standarisasi

Standarize <- function(x) {
    (x- min(x))/(max(x)-min(x))
}

DataStandarisasi <- as.data.frame(lapply(DataNum, scale))
DataStandarisasi

1.5.2 Normalisasi

normalize <- function(x) {
    (x- min(x))/(max(x)-min(x))
}

DataNormalisasi <- as.data.frame(lapply(DataNum, normalize))
DataNormalisasi

1.5.3 Penskalaan Robust

robust_scale <- function(x) {
  (x-median(x)) / (quantile(x,probs =.75)-quantile(x,probs =.25))
}

DataRobustScale <- as.data.frame(lapply(DataNum,robust_scale))
DataRobustScale

1.6 Penanganan Data Pencilan

outliers <- function(x) {

  Q1 <- quantile(x, probs=.25)
  Q3 <- quantile(x, probs=.75)
  IQR = Q3-Q1

 upper_limit = Q3 + (IQR*1.5)
 lower_limit = Q1 - (IQR*1.5)

 x > upper_limit | x < lower_limit
}

remove_outliers <- function(df, cols = names(df)) {
  for (col in cols) {
    df <- df[!outliers(df[[col]]),]
  }
  df
}
outliers <- function(x) {

  Q1 <- quantile(x, probs=.25)
  Q3 <- quantile(x, probs=.75)
  iqr = Q3-Q1

 upper_limit = Q3 + (iqr*1.5)
 lower_limit = Q1 - (iqr*1.5)

 x > upper_limit | x < lower_limit
}

Oco <- subset(DataNum, outliers(DataNum$LoanAmount))
Oap <- subset(DataNum, outliers(DataNum$ApplicantIncome))
Ola <- subset(DataNum, outliers(DataNum$CoapplicantIncome))

AllOutliers <- rbind(Oco,Oap,Ola)
data.frame(
  Jumlah_Outliers = AllOutliers %>% nrow (),
  Jumlah_Outliers_Unik = AllOutliers %>% distinct() %>% nrow()
)
UniqOurliers <- AllOutliers %>% distinct

NoOutliers <- NoNAData %>% anti_join(UniqOurliers)
NoOutliers

1.7 Penanganan Data Kategorikal

1.7.1 Mengubah label

DataKat %>% summarise_all(n_distinct)
GenderLabel           <-factor(DataKat$Gender, labels=c(0, 1))                                   # 0=Female   1=Male
MarriedLabel          <-factor(DataKat$Married, labels=c(0, 1))                                  # 0=No       1=Yes
DependentsLabel       <-factor(DataKat$Dependents, labels=c(0, 1, 2, 3))                         # 0=0        1=1            2=2         3=3+ 
EducationLabel        <-factor(DataKat$Education, labels=c(0, 1))                                # 0=Graduate 1=Not Graduate
Self_EmployedLabel    <-factor(DataKat$Self_Employed, labels=c(0, 1))                            # 0=No       1=Yes
Loan_Amount_TermLabel <-factor(DataKat$Loan_Amount_Term, labels=c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) # 0=12       1=120 2=180 3=240 4= 5=300 6=360 7=480 8=60 9=84 
Credit_HistoryLabel   <-factor(DataKat$Credit_History, labels=c(0, 1))                           # 0=No       1=Yes
Property_AreaLabel    <-factor(DataKat$Property_Area, labels=c(0, 1, 2))                         # 0=Rural    1=Semiurban    2=Urban
Loan_StatusLabel      <-factor(DataKat$Loan_Status, labels=c(0, 1))                              # 0=No       1=Yes

DataKatLabeled <- data.frame("ID" = DataKat$ï..Loan_ID, GenderLabel, MarriedLabel, DependentsLabel, EducationLabel, Self_EmployedLabel, Loan_Amount_TermLabel, Credit_HistoryLabel, Property_AreaLabel, Loan_StatusLabel)

DataKatLabeled

2 Tugas 2

Lakukan Proses Visualisasi Data dengan menggunakan R dengan beberapa langkah berikut:

2.1 Visualisasi Univariabel

2.1.1 Data Kategorikal

library(patchwork)

plot1_G <- ggplot(NoOutliers, aes(x = Gender))+geom_bar()
plot1_M <- ggplot(NoOutliers, aes(x = Married))+geom_bar()
plot1_D <- ggplot(NoOutliers, aes(x = Dependents))+geom_bar()
plot1_E <- ggplot(NoOutliers, aes(x = Education))+geom_bar()
plot1_SE <- ggplot(NoOutliers, aes(x = Self_Employed))+geom_bar()
plot1_LAT <- ggplot(NoOutliers, aes(x = Loan_Amount_Term))+geom_bar()+theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=0.5))
plot1_CH <- ggplot(NoOutliers, aes(x = Credit_History))+geom_bar()
plot1_PA <- ggplot(NoOutliers, aes(x = Property_Area))+geom_bar()
plot1_LS <- ggplot(NoOutliers, aes(x = Loan_Status))+geom_bar()

ggarrange(plot1_G, plot1_M, plot1_D, plot1_E, plot1_SE, plot1_LAT, plot1_CH, plot1_PA, plot1_LS)

2.1.2 Data Numerikal

plot1_AI <- ggplot(NoOutliers, aes(x = ApplicantIncome))+geom_histogram(bins = 12, colour = "white")
plot1_CI <- ggplot(NoOutliers, aes(x = CoapplicantIncome))+geom_histogram(bins = 12, colour = "white")
plot1_LA <- ggplot(NoOutliers, aes(x = LoanAmount))+geom_histogram(bins = 12, colour = "white")

ggarrange(plot1_AI, plot1_CI, plot1_LA)

2.2 Visualisasi Bivariabel

2.2.1 Kategorikal vs Kategorikal

plot2_G_M <- ggplot(NoOutliers, aes(x = Gender, fill = Married)) +
             theme_minimal() +                                  # use a minimal theme
             geom_bar(position = position_dodge(preserve = "single"))
plot2_G_E <- ggplot(NoOutliers, aes(x = Gender, fill = Education)) +
             theme_minimal() +                                  # use a minimal theme
             geom_bar(position = position_dodge(preserve = "single"))
plot2_M_E <- ggplot(NoOutliers, aes(x = Married, fill = Education)) +
             theme_minimal() +                                  # use a minimal theme
             geom_bar(position = position_dodge(preserve = "single"))
Plot2_E_PA <- ggplot(NoOutliers, aes(x = Education, fill = Property_Area)) +
              theme_minimal() +                                  # use a minimal theme
              geom_bar(position = position_dodge(preserve = "single"))

ggarrange(plot2_G_M, plot2_G_E, plot2_M_E, Plot2_E_PA)

2.2.2 Data Numerikal vs Numerikal

plot2_LA_CI <- ggplot(NoOutliers, aes(x = LoanAmount, y = CoapplicantIncome )) +
               theme_minimal() +                                  # use a minimal theme
               geom_line()
plot2_LA_AI <- ggplot(NoOutliers, aes(x = LoanAmount, y = ApplicantIncome )) +
               theme_minimal() +                                  # use a minimal theme
               geom_line()
plot2_AI_CI <- ggplot(NoOutliers, aes(x = ApplicantIncome, y = CoapplicantIncome )) +
               theme_minimal() +                                  # use a minimal theme
               geom_line()
plot2_CI_LA <- ggplot(NoOutliers, aes(x = CoapplicantIncome, y = LoanAmount )) +
               theme_minimal() +                                  # use a minimal theme
               geom_line()
plot2_AI_LA <- ggplot(NoOutliers, aes(x = ApplicantIncome, y = LoanAmount )) +
               theme_minimal() +                                  # use a minimal theme
               geom_line()
plot2_CI_AI <- ggplot(NoOutliers, aes(x = CoapplicantIncome, y = ApplicantIncome )) +
               theme_minimal() +                                  # use a minimal theme
               geom_line()


ggarrange(plot2_LA_CI, plot2_LA_AI, plot2_AI_CI, plot2_CI_LA, plot2_AI_LA, plot2_CI_AI)

2.2.3 Data Numerikal vs Kategorikal

plot2_LA_LS <- ggplot(NoOutliers, 
                      aes(x = LoanAmount, 
                          fill = Loan_Status)) +
                          geom_density(alpha = 0.3) +
                          theme_minimal() +
                          labs(title = "Loan Amount distribution by Loan Amount  Term")

plot2_CI_M <- ggplot(NoOutliers, 
                      aes(x = CoapplicantIncome, 
                          fill = Married)) +
                          geom_density(alpha = 0.3) +
                          theme_minimal() +
                          labs(title = "Coapplicant Income distribution by Married")

plot2_AI_E <- ggplot(NoOutliers, 
                      aes(x = ApplicantIncome, 
                          fill = Education)) +
                          geom_density(alpha = 0.3) +
                          theme_minimal() +
                          labs(title = "Applicant Income distribution by Education")
plot2_LA_PA <- ggplot(NoOutliers, 
                      aes(x = ApplicantIncome, 
                          fill = Property_Area)) +
                          geom_density(alpha = 0.3) +
                          theme_minimal() +
                          labs(title = "Applicant Income distribution by Property Area")

ggarrange(plot2_LA_LS, plot2_CI_M, plot2_AI_E, plot2_LA_PA)

2.3 Visualisasi Multivariabel

plot3_AI_CI_PA <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=CoapplicantIncome, shape=Property_Area, colour=Property_Area))+geom_point()
plot3_AI_LA_PA <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=LoanAmount, shape=Property_Area, colour=Property_Area))+geom_point()
plot3_LA_CI_PA <-ggplot(NoOutliers, aes(x=LoanAmount, y=CoapplicantIncome, shape=Property_Area, colour=Property_Area))+geom_point()
plot3_AI_CI_LAT <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=CoapplicantIncome, shape=Loan_Amount_Term, colour=Property_Area))+geom_point()
plot3_AI_LA_LAT <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=LoanAmount, shape=Loan_Amount_Term, colour=Property_Area))+geom_point()
plot3_LA_CI_LAT <-ggplot(NoOutliers, aes(x=LoanAmount, y=CoapplicantIncome, shape=Loan_Amount_Term, colour=Property_Area))+geom_point()
plot3_AI_CI_E <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=CoapplicantIncome, shape=Education, colour=Property_Area))+geom_point()
plot3_AI_LA_E <-ggplot(NoOutliers, aes(x=ApplicantIncome, y=LoanAmount, shape=Education, colour=Property_Area))+geom_point()
plot3_LA_CI_E <-ggplot(NoOutliers, aes(x=LoanAmount, y=CoapplicantIncome, shape=Education, colour=Property_Area))+geom_point()



plot3_AI_CI_PA

plot3_AI_LA_PA

plot3_LA_CI_PA

plot3_AI_CI_LAT

plot3_AI_LA_LAT

plot3_LA_CI_LAT

plot3_AI_CI_E

plot3_AI_LA_E

plot3_LA_CI_E

3 Tugas 3

Lakukan proses analisa data secara deskriptif menggunakan R dengan beberapa langkah berikut:

3.1 Kualitatif

3.1.1 Kategori Univariat

prop.table(table(NoOutliers$Gender))
## 
##    Female      Male 
## 0.1869159 0.8130841
prop.table(table(NoOutliers$Married))
## 
##        No       Yes 
## 0.3495327 0.6504673
prop.table(table(NoOutliers$Dependents))
## 
##          0          1          2         3+ 
## 0.60000000 0.15700935 0.16822430 0.07476636
prop.table(table(NoOutliers$Education))
## 
##     Graduate Not Graduate 
##    0.7551402    0.2448598
prop.table(table(NoOutliers$Self_Employed))
## 
##        No       Yes 
## 0.8859813 0.1140187
prop.table(table(NoOutliers$Loan_Amount_Term))
## 
##          12         120         180         240         300          36 
## 0.001869159 0.005607477 0.067289720 0.007476636 0.018691589 0.003738318 
##         360         480          60          84 
## 0.857943925 0.026168224 0.003738318 0.007476636
prop.table(table(NoOutliers$Credit_History))
## 
##         0         1 
## 0.1457944 0.8542056
prop.table(table(NoOutliers$Property_Area))
## 
##     Rural Semiurban     Urban 
## 0.2990654 0.3831776 0.3177570
prop.table(table(NoOutliers$Loan_Status))
## 
##         N         Y 
## 0.3046729 0.6953271

3.1.2 Kategori Bivariat

library(dplyr)                                                # for data manipulation
library(magrittr)                                             # for data manipulation similar to dplyr

NoOutliers %>% select(Gender, Married) %>%  table()           # load the data and select vectors into matrix and inspect
##         Married
## Gender    No Yes
##   Female  72  28
##   Male   115 320
NoOutliers %>% select(Gender, Education) %>%  table()         # load the data and select vectors into matrix and inspect 
##         Education
## Gender   Graduate Not Graduate
##   Female       82           18
##   Male        322          113
NoOutliers %>% select(Gender, Property_Area) %>%  table()     # load the data and select vectors into matrix and inspect 
##         Property_Area
## Gender   Rural Semiurban Urban
##   Female    24        49    27
##   Male     136       156   143
NoOutliers %>% select(Education, Self_Employed) %>%  table()  # load the data and select vectors into matrix and inspect 
##               Self_Employed
## Education       No Yes
##   Graduate     358  46
##   Not Graduate 116  15
NoOutliers %>% select(Gender, Loan_Amount_Term) %>%  table()  # load the data and select vectors into matrix and inspect 
##         Loan_Amount_Term
## Gender    12 120 180 240 300  36 360 480  60  84
##   Female   0   0   2   1   1   1  90   4   0   1
##   Male     1   3  34   3   9   1 369  10   2   3
NoOutliers %>% select(Married, Loan_Amount_Term) %>%  table() # load the data and select vectors into matrix and inspect 
##        Loan_Amount_Term
## Married  12 120 180 240 300  36 360 480  60  84
##     No    0   1   7   1   3   2 164   8   1   0
##     Yes   1   2  29   3   7   0 295   6   1   4

3.1.3 Kategori Multivariat

NoOutliers %>% select(Gender, Married, Education) %>% ftable()
##                Education Graduate Not Graduate
## Gender Married                                
## Female No                      60           12
##        Yes                     22            6
## Male   No                      84           31
##        Yes                    238           82
NoOutliers %>% select(Gender, Married, Education, Property_Area, Loan_Amount_Term) %>% ftable()
##                                           Loan_Amount_Term 12 120 180 240 300 36 360 480 60 84
## Gender Married Education    Property_Area                                                     
## Female No      Graduate     Rural                           0   0   0   0   0  0  13   2  0  0
##                             Semiurban                       0   0   0   0   1  1  23   0  0  0
##                             Urban                           0   0   1   0   0  0  18   1  0  0
##                Not Graduate Rural                           0   0   0   0   0  0   4   0  0  0
##                             Semiurban                       0   0   0   0   0  0   5   0  0  0
##                             Urban                           0   0   0   0   0  0   3   0  0  0
##        Yes     Graduate     Rural                           0   0   0   0   0  0   3   0  0  0
##                             Semiurban                       0   0   1   1   0  0  11   1  0  1
##                             Urban                           0   0   0   0   0  0   4   0  0  0
##                Not Graduate Rural                           0   0   0   0   0  0   2   0  0  0
##                             Semiurban                       0   0   0   0   0  0   4   0  0  0
##                             Urban                           0   0   0   0   0  0   0   0  0  0
## Male   No      Graduate     Rural                           0   0   0   0   0  0  27   0  0  0
##                             Semiurban                       0   0   1   0   1  0  23   2  0  0
##                             Urban                           0   0   3   0   0  0  24   2  1  0
##                Not Graduate Rural                           0   0   0   0   1  0  12   0  0  0
##                             Semiurban                       0   1   0   0   0  1   9   0  0  0
##                             Urban                           0   0   2   1   0  0   3   1  0  0
##        Yes     Graduate     Rural                           0   0   7   0   1  0  57   0  0  2
##                             Semiurban                       0   1   3   1   3  0  84   2  0  0
##                             Urban                           1   1   5   1   1  0  67   0  0  1
##                Not Graduate Rural                           0   0   3   0   0  0  26   0  0  0
##                             Semiurban                       0   0   2   0   1  0  19   2  0  0
##                             Urban                           0   0   8   0   1  0  18   1  1  0

3.2 Kuantitatif

3.2.1 Univariat Numerik

3.2.1.1 Summary

NoOutliersNum <- select_if(NoOutliers, is.numeric)
summary(NoOutliersNum)
##  ApplicantIncome CoapplicantIncome   LoanAmount   
##  Min.   :  150   Min.   :   0      Min.   :  9.0  
##  1st Qu.: 2752   1st Qu.:   0      1st Qu.:100.0  
##  Median : 3598   Median :1260      Median :124.0  
##  Mean   : 4054   Mean   :1323      Mean   :127.0  
##  3rd Qu.: 4891   3rd Qu.:2194      3rd Qu.:151.5  
##  Max.   :10139   Max.   :5701      Max.   :260.0

3.2.1.2 Variance

sapply(NoOutliersNum, var)                #Variance
##   ApplicantIncome CoapplicantIncome        LoanAmount 
##       3435005.100       2019826.684          1990.793

3.2.1.3 Standard Deviation

sapply(NoOutliersNum, sd)                 #Standard Deviation
##   ApplicantIncome CoapplicantIncome        LoanAmount 
##        1853.37668        1421.20607          44.61831

3.2.1.4 Median Absolute Deviation

sapply(NoOutliersNum, mad)               #Median Absolute Deviation
##   ApplicantIncome CoapplicantIncome        LoanAmount 
##         1504.8390         1868.0760           38.5476

3.2.1.5 Inter Quantile Range

sapply(NoOutliersNum, IQR)                 #Inter Quantile Range
##   ApplicantIncome CoapplicantIncome        LoanAmount 
##            2138.5            2194.0              51.5

3.2.1.6 Skewness

sapply(NoOutliersNum, skewness)            #skewness
##   ApplicantIncome CoapplicantIncome        LoanAmount 
##         1.1358372         0.8411675         0.3989309

3.2.1.7 Kurtosis

sapply(NoOutliersNum, kurtosis)            #kurtosis
##   ApplicantIncome CoapplicantIncome        LoanAmount 
##        1.19274525        0.01943741        0.47240548

3.2.2 Bivariat Numerik

3.2.2.1 Covariance

cov(NoOutliers$ApplicantIncome,NoOutliers$CoapplicantIncome)
## [1] -702662.6
cov(NoOutliers$CoapplicantIncome,NoOutliers$LoanAmount)
## [1] 18225.52
cov(NoOutliers$LoanAmount,NoOutliers$ApplicantIncome)
## [1] 39323.8

3.2.2.2 Correlation

cor(NoOutliers$ApplicantIncome,NoOutliers$CoapplicantIncome)
## [1] -0.2667633
cor(NoOutliers$CoapplicantIncome,NoOutliers$LoanAmount)
## [1] 0.2874152
cor(NoOutliers$LoanAmount,NoOutliers$ApplicantIncome)
## [1] 0.4755308

3.2.3 Multivariat Numerik

cov(NoOutliersNum)
##                   ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome         3435005.1        -702662.55  39323.796
## CoapplicantIncome       -702662.6        2019826.68  18225.520
## LoanAmount                39323.8          18225.52   1990.793
cor(NoOutliersNum)
##                   ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome         1.0000000        -0.2667633  0.4755308
## CoapplicantIncome      -0.2667633         1.0000000  0.2874152
## LoanAmount              0.4755308         0.2874152  1.0000000
var(NoOutliersNum)
##                   ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome         3435005.1        -702662.55  39323.796
## CoapplicantIncome       -702662.6        2019826.68  18225.520
## LoanAmount                39323.8          18225.52   1990.793

3.3 EDA in Lazy Way

library(funModeling) 
library(tidyverse) 
library(Hmisc)
library(skimr)

basic_eda <- function(data)
{
  glimpse(data)
  skim(data)
  df_status(data)
  freq(data) 
  profiling_num(data)
  plot_num(data)
  describe(data)
}

basic_eda(NoOutliers)
## Rows: 535
## Columns: 13
## $ ï..Loan_ID        <chr> "LP001002", "LP001003", "LP001005", "LP001006", "LP0~
## $ Gender            <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Mal~
## $ Married           <chr> "No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes"~
## $ Dependents        <chr> "0", "1", "0", "0", "0", "0", "3+", "2", "2", "2", "~
## $ Education         <chr> "Graduate", "Graduate", "Graduate", "Not Graduate", ~
## $ Self_Employed     <chr> "No", "No", "Yes", "No", "No", "No", "No", "No", "No~
## $ ApplicantIncome   <int> 5849, 4583, 3000, 2583, 6000, 2333, 3036, 4006, 3200~
## $ CoapplicantIncome <dbl> 0, 1508, 0, 2358, 0, 1516, 2504, 1526, 700, 1840, 28~
## $ LoanAmount        <dbl> 146.4122, 128.0000, 66.0000, 120.0000, 141.0000, 95.~
## $ Loan_Amount_Term  <chr> "360", "360", "360", "360", "360", "360", "360", "36~
## $ Credit_History    <chr> "1", "1", "1", "1", "1", "1", "0", "1", "1", "1", "1~
## $ Property_Area     <chr> "Urban", "Rural", "Urban", "Urban", "Urban", "Urban"~
## $ Loan_Status       <chr> "Y", "N", "Y", "Y", "Y", "Y", "N", "Y", "Y", "Y", "N~
##             variable q_zeros p_zeros q_na p_na q_inf p_inf      type unique
## 1         ï..Loan_ID       0    0.00    0    0     0     0 character    535
## 2             Gender       0    0.00    0    0     0     0 character      2
## 3            Married       0    0.00    0    0     0     0 character      2
## 4         Dependents     321   60.00    0    0     0     0 character      4
## 5          Education       0    0.00    0    0     0     0 character      2
## 6      Self_Employed       0    0.00    0    0     0     0 character      2
## 7    ApplicantIncome       0    0.00    0    0     0     0   integer    438
## 8  CoapplicantIncome     229   42.80    0    0     0     0   numeric    259
## 9         LoanAmount       0    0.00    0    0     0     0   numeric    161
## 10  Loan_Amount_Term       0    0.00    0    0     0     0 character     10
## 11    Credit_History      78   14.58    0    0     0     0 character      2
## 12     Property_Area       0    0.00    0    0     0     0 character      3
## 13       Loan_Status       0    0.00    0    0     0     0 character      2
##     ï..Loan_ID frequency percentage cumulative_perc
## 1     LP001002         1       0.19            0.19
## 2     LP001003         1       0.19            0.38
## 3     LP001005         1       0.19            0.57
## 4     LP001006         1       0.19            0.76
## 5     LP001008         1       0.19            0.95
## 6     LP001013         1       0.19            1.14
## 7     LP001014         1       0.19            1.33
## 8     LP001018         1       0.19            1.52
## 9     LP001024         1       0.19            1.71
## 10    LP001027         1       0.19            1.90
## 11    LP001029         1       0.19            2.09
## 12    LP001030         1       0.19            2.28
## 13    LP001032         1       0.19            2.47
## 14    LP001034         1       0.19            2.66
## 15    LP001036         1       0.19            2.85
## 16    LP001038         1       0.19            3.04
## 17    LP001041         1       0.19            3.23
## 18    LP001043         1       0.19            3.42
## 19    LP001047         1       0.19            3.61
## 20    LP001050         1       0.19            3.80
## 21    LP001052         1       0.19            3.99
## 22    LP001066         1       0.19            4.18
## 23    LP001068         1       0.19            4.37
## 24    LP001073         1       0.19            4.56
## 25    LP001086         1       0.19            4.75
## 26    LP001087         1       0.19            4.94
## 27    LP001091         1       0.19            5.13
## 28    LP001095         1       0.19            5.32
## 29    LP001097         1       0.19            5.51
## 30    LP001098         1       0.19            5.70
## 31    LP001106         1       0.19            5.89
## 32    LP001109         1       0.19            6.08
## 33    LP001112         1       0.19            6.27
## 34    LP001116         1       0.19            6.46
## 35    LP001119         1       0.19            6.65
## 36    LP001120         1       0.19            6.84
## 37    LP001123         1       0.19            7.03
## 38    LP001131         1       0.19            7.22
## 39    LP001136         1       0.19            7.41
## 40    LP001137         1       0.19            7.60
## 41    LP001138         1       0.19            7.79
## 42    LP001144         1       0.19            7.98
## 43    LP001146         1       0.19            8.17
## 44    LP001151         1       0.19            8.36
## 45    LP001155         1       0.19            8.55
## 46    LP001157         1       0.19            8.74
## 47    LP001164         1       0.19            8.93
## 48    LP001179         1       0.19            9.12
## 49    LP001194         1       0.19            9.31
## 50    LP001195         1       0.19            9.50
## 51    LP001197         1       0.19            9.69
## 52    LP001198         1       0.19            9.88
## 53    LP001199         1       0.19           10.07
## 54    LP001205         1       0.19           10.26
## 55    LP001206         1       0.19           10.45
## 56    LP001207         1       0.19           10.64
## 57    LP001213         1       0.19           10.83
## 58    LP001222         1       0.19           11.02
## 59    LP001225         1       0.19           11.21
## 60    LP001228         1       0.19           11.40
## 61    LP001238         1       0.19           11.59
## 62    LP001241         1       0.19           11.78
## 63    LP001243         1       0.19           11.97
## 64    LP001245         1       0.19           12.16
## 65    LP001248         1       0.19           12.35
## 66    LP001250         1       0.19           12.54
## 67    LP001253         1       0.19           12.73
## 68    LP001255         1       0.19           12.92
## 69    LP001256         1       0.19           13.11
## 70    LP001259         1       0.19           13.30
## 71    LP001263         1       0.19           13.49
## 72    LP001264         1       0.19           13.68
## 73    LP001265         1       0.19           13.87
## 74    LP001266         1       0.19           14.06
## 75    LP001267         1       0.19           14.25
## 76    LP001275         1       0.19           14.44
## 77    LP001279         1       0.19           14.63
## 78    LP001280         1       0.19           14.82
## 79    LP001282         1       0.19           15.01
## 80    LP001289         1       0.19           15.20
## 81    LP001310         1       0.19           15.39
## 82    LP001316         1       0.19           15.58
## 83    LP001318         1       0.19           15.77
## 84    LP001319         1       0.19           15.96
## 85    LP001322         1       0.19           16.15
## 86    LP001325         1       0.19           16.34
## 87    LP001326         1       0.19           16.53
## 88    LP001327         1       0.19           16.72
## 89    LP001333         1       0.19           16.91
## 90    LP001334         1       0.19           17.10
## 91    LP001343         1       0.19           17.29
## 92    LP001345         1       0.19           17.48
## 93    LP001349         1       0.19           17.67
## 94    LP001356         1       0.19           17.86
## 95    LP001357         1       0.19           18.05
## 96    LP001367         1       0.19           18.24
## 97    LP001370         1       0.19           18.43
## 98    LP001379         1       0.19           18.62
## 99    LP001384         1       0.19           18.81
## 100   LP001385         1       0.19           19.00
## 101   LP001387         1       0.19           19.19
## 102   LP001391         1       0.19           19.38
## 103   LP001392         1       0.19           19.57
## 104   LP001398         1       0.19           19.76
## 105   LP001404         1       0.19           19.95
## 106   LP001405         1       0.19           20.14
## 107   LP001421         1       0.19           20.33
## 108   LP001426         1       0.19           20.52
## 109   LP001430         1       0.19           20.71
## 110   LP001432         1       0.19           20.90
## 111   LP001439         1       0.19           21.09
## 112   LP001443         1       0.19           21.28
## 113   LP001449         1       0.19           21.47
## 114   LP001465         1       0.19           21.66
## 115   LP001473         1       0.19           21.85
## 116   LP001478         1       0.19           22.04
## 117   LP001482         1       0.19           22.23
## 118   LP001487         1       0.19           22.42
## 119   LP001489         1       0.19           22.61
## 120   LP001491         1       0.19           22.80
## 121   LP001493         1       0.19           22.99
## 122   LP001497         1       0.19           23.18
## 123   LP001498         1       0.19           23.37
## 124   LP001504         1       0.19           23.56
## 125   LP001507         1       0.19           23.75
## 126   LP001514         1       0.19           23.94
## 127   LP001518         1       0.19           24.13
## 128   LP001519         1       0.19           24.32
## 129   LP001520         1       0.19           24.51
## 130   LP001528         1       0.19           24.70
## 131   LP001529         1       0.19           24.89
## 132   LP001531         1       0.19           25.08
## 133   LP001532         1       0.19           25.27
## 134   LP001535         1       0.19           25.46
## 135   LP001541         1       0.19           25.65
## 136   LP001543         1       0.19           25.84
## 137   LP001546         1       0.19           26.03
## 138   LP001552         1       0.19           26.22
## 139   LP001560         1       0.19           26.41
## 140   LP001565         1       0.19           26.60
## 141   LP001570         1       0.19           26.79
## 142   LP001572         1       0.19           26.98
## 143   LP001574         1       0.19           27.17
## 144   LP001577         1       0.19           27.36
## 145   LP001578         1       0.19           27.55
## 146   LP001579         1       0.19           27.74
## 147   LP001580         1       0.19           27.93
## 148   LP001581         1       0.19           28.12
## 149   LP001586         1       0.19           28.31
## 150   LP001594         1       0.19           28.50
## 151   LP001603         1       0.19           28.69
## 152   LP001606         1       0.19           28.88
## 153   LP001608         1       0.19           29.07
## 154   LP001616         1       0.19           29.26
## 155   LP001630         1       0.19           29.45
## 156   LP001634         1       0.19           29.64
## 157   LP001636         1       0.19           29.83
## 158   LP001639         1       0.19           30.02
## 159   LP001641         1       0.19           30.21
## 160   LP001643         1       0.19           30.40
## 161   LP001644         1       0.19           30.59
## 162   LP001647         1       0.19           30.78
## 163   LP001653         1       0.19           30.97
## 164   LP001657         1       0.19           31.16
## 165   LP001658         1       0.19           31.35
## 166   LP001664         1       0.19           31.54
## 167   LP001665         1       0.19           31.73
## 168   LP001666         1       0.19           31.92
## 169   LP001669         1       0.19           32.11
## 170   LP001671         1       0.19           32.30
## 171   LP001674         1       0.19           32.49
## 172   LP001677         1       0.19           32.68
## 173   LP001682         1       0.19           32.87
## 174   LP001688         1       0.19           33.06
## 175   LP001691         1       0.19           33.25
## 176   LP001692         1       0.19           33.44
## 177   LP001693         1       0.19           33.63
## 178   LP001698         1       0.19           33.82
## 179   LP001699         1       0.19           34.01
## 180   LP001702         1       0.19           34.20
## 181   LP001708         1       0.19           34.39
## 182   LP001711         1       0.19           34.58
## 183   LP001713         1       0.19           34.77
## 184   LP001715         1       0.19           34.96
## 185   LP001716         1       0.19           35.15
## 186   LP001720         1       0.19           35.34
## 187   LP001722         1       0.19           35.53
## 188   LP001726         1       0.19           35.72
## 189   LP001732         1       0.19           35.91
## 190   LP001734         1       0.19           36.10
## 191   LP001736         1       0.19           36.29
## 192   LP001743         1       0.19           36.48
## 193   LP001744         1       0.19           36.67
## 194   LP001749         1       0.19           36.86
## 195   LP001750         1       0.19           37.05
## 196   LP001751         1       0.19           37.24
## 197   LP001754         1       0.19           37.43
## 198   LP001758         1       0.19           37.62
## 199   LP001760         1       0.19           37.81
## 200   LP001761         1       0.19           38.00
## 201   LP001765         1       0.19           38.19
## 202   LP001768         1       0.19           38.38
## 203   LP001770         1       0.19           38.57
## 204   LP001778         1       0.19           38.76
## 205   LP001784         1       0.19           38.95
## 206   LP001786         1       0.19           39.14
## 207   LP001788         1       0.19           39.33
## 208   LP001790         1       0.19           39.52
## 209   LP001792         1       0.19           39.71
## 210   LP001798         1       0.19           39.90
## 211   LP001800         1       0.19           40.09
## 212   LP001806         1       0.19           40.28
## 213   LP001807         1       0.19           40.47
## 214   LP001811         1       0.19           40.66
## 215   LP001813         1       0.19           40.85
## 216   LP001814         1       0.19           41.04
## 217   LP001819         1       0.19           41.23
## 218   LP001824         1       0.19           41.42
## 219   LP001825         1       0.19           41.61
## 220   LP001835         1       0.19           41.80
## 221   LP001836         1       0.19           41.99
## 222   LP001841         1       0.19           42.18
## 223   LP001846         1       0.19           42.37
## 224   LP001849         1       0.19           42.56
## 225   LP001854         1       0.19           42.75
## 226   LP001864         1       0.19           42.94
## 227   LP001868         1       0.19           43.13
## 228   LP001870         1       0.19           43.32
## 229   LP001871         1       0.19           43.51
## 230   LP001872         1       0.19           43.70
## 231   LP001875         1       0.19           43.89
## 232   LP001877         1       0.19           44.08
## 233   LP001882         1       0.19           44.27
## 234   LP001883         1       0.19           44.46
## 235   LP001884         1       0.19           44.65
## 236   LP001888         1       0.19           44.84
## 237   LP001892         1       0.19           45.03
## 238   LP001894         1       0.19           45.22
## 239   LP001896         1       0.19           45.41
## 240   LP001900         1       0.19           45.60
## 241   LP001903         1       0.19           45.79
## 242   LP001904         1       0.19           45.98
## 243   LP001908         1       0.19           46.17
## 244   LP001910         1       0.19           46.36
## 245   LP001914         1       0.19           46.55
## 246   LP001915         1       0.19           46.74
## 247   LP001917         1       0.19           46.93
## 248   LP001924         1       0.19           47.12
## 249   LP001925         1       0.19           47.31
## 250   LP001926         1       0.19           47.50
## 251   LP001931         1       0.19           47.69
## 252   LP001935         1       0.19           47.88
## 253   LP001936         1       0.19           48.07
## 254   LP001938         1       0.19           48.26
## 255   LP001940         1       0.19           48.45
## 256   LP001945         1       0.19           48.64
## 257   LP001947         1       0.19           48.83
## 258   LP001949         1       0.19           49.02
## 259   LP001953         1       0.19           49.21
## 260   LP001954         1       0.19           49.40
## 261   LP001955         1       0.19           49.59
## 262   LP001963         1       0.19           49.78
## 263   LP001964         1       0.19           49.97
## 264   LP001972         1       0.19           50.16
## 265   LP001974         1       0.19           50.35
## 266   LP001977         1       0.19           50.54
## 267   LP001978         1       0.19           50.73
## 268   LP001990         1       0.19           50.92
## 269   LP001993         1       0.19           51.11
## 270   LP001994         1       0.19           51.30
## 271   LP001998         1       0.19           51.49
## 272   LP002002         1       0.19           51.68
## 273   LP002004         1       0.19           51.87
## 274   LP002006         1       0.19           52.06
## 275   LP002008         1       0.19           52.25
## 276   LP002024         1       0.19           52.44
## 277   LP002031         1       0.19           52.63
## 278   LP002035         1       0.19           52.82
## 279   LP002036         1       0.19           53.01
## 280   LP002043         1       0.19           53.20
## 281   LP002050         1       0.19           53.39
## 282   LP002051         1       0.19           53.58
## 283   LP002053         1       0.19           53.77
## 284   LP002054         1       0.19           53.96
## 285   LP002055         1       0.19           54.15
## 286   LP002068         1       0.19           54.34
## 287   LP002082         1       0.19           54.53
## 288   LP002086         1       0.19           54.72
## 289   LP002087         1       0.19           54.91
## 290   LP002097         1       0.19           55.10
## 291   LP002098         1       0.19           55.29
## 292   LP002100         1       0.19           55.48
## 293   LP002103         1       0.19           55.67
## 294   LP002106         1       0.19           55.86
## 295   LP002110         1       0.19           56.05
## 296   LP002112         1       0.19           56.24
## 297   LP002113         1       0.19           56.43
## 298   LP002114         1       0.19           56.62
## 299   LP002115         1       0.19           56.81
## 300   LP002116         1       0.19           57.00
## 301   LP002119         1       0.19           57.19
## 302   LP002126         1       0.19           57.38
## 303   LP002128         1       0.19           57.57
## 304   LP002129         1       0.19           57.76
## 305   LP002130         1       0.19           57.95
## 306   LP002131         1       0.19           58.14
## 307   LP002137         1       0.19           58.33
## 308   LP002139         1       0.19           58.52
## 309   LP002141         1       0.19           58.71
## 310   LP002142         1       0.19           58.90
## 311   LP002143         1       0.19           59.09
## 312   LP002144         1       0.19           59.28
## 313   LP002149         1       0.19           59.47
## 314   LP002151         1       0.19           59.66
## 315   LP002158         1       0.19           59.85
## 316   LP002160         1       0.19           60.04
## 317   LP002161         1       0.19           60.23
## 318   LP002170         1       0.19           60.42
## 319   LP002175         1       0.19           60.61
## 320   LP002178         1       0.19           60.80
## 321   LP002180         1       0.19           60.99
## 322   LP002181         1       0.19           61.18
## 323   LP002187         1       0.19           61.37
## 324   LP002188         1       0.19           61.56
## 325   LP002190         1       0.19           61.75
## 326   LP002197         1       0.19           61.94
## 327   LP002205         1       0.19           62.13
## 328   LP002209         1       0.19           62.32
## 329   LP002211         1       0.19           62.51
## 330   LP002219         1       0.19           62.70
## 331   LP002223         1       0.19           62.89
## 332   LP002224         1       0.19           63.08
## 333   LP002225         1       0.19           63.27
## 334   LP002226         1       0.19           63.46
## 335   LP002231         1       0.19           63.65
## 336   LP002234         1       0.19           63.84
## 337   LP002236         1       0.19           64.03
## 338   LP002237         1       0.19           64.22
## 339   LP002239         1       0.19           64.41
## 340   LP002243         1       0.19           64.60
## 341   LP002244         1       0.19           64.79
## 342   LP002250         1       0.19           64.98
## 343   LP002255         1       0.19           65.17
## 344   LP002263         1       0.19           65.36
## 345   LP002265         1       0.19           65.55
## 346   LP002266         1       0.19           65.74
## 347   LP002272         1       0.19           65.93
## 348   LP002277         1       0.19           66.12
## 349   LP002281         1       0.19           66.31
## 350   LP002284         1       0.19           66.50
## 351   LP002287         1       0.19           66.69
## 352   LP002288         1       0.19           66.88
## 353   LP002296         1       0.19           67.07
## 354   LP002300         1       0.19           67.26
## 355   LP002301         1       0.19           67.45
## 356   LP002305         1       0.19           67.64
## 357   LP002308         1       0.19           67.83
## 358   LP002314         1       0.19           68.02
## 359   LP002315         1       0.19           68.21
## 360   LP002318         1       0.19           68.40
## 361   LP002319         1       0.19           68.59
## 362   LP002328         1       0.19           68.78
## 363   LP002332         1       0.19           68.97
## 364   LP002335         1       0.19           69.16
## 365   LP002337         1       0.19           69.35
## 366   LP002341         1       0.19           69.54
## 367   LP002345         1       0.19           69.73
## 368   LP002347         1       0.19           69.92
## 369   LP002348         1       0.19           70.11
## 370   LP002357         1       0.19           70.30
## 371   LP002361         1       0.19           70.49
## 372   LP002362         1       0.19           70.68
## 373   LP002366         1       0.19           70.87
## 374   LP002367         1       0.19           71.06
## 375   LP002368         1       0.19           71.25
## 376   LP002369         1       0.19           71.44
## 377   LP002370         1       0.19           71.63
## 378   LP002377         1       0.19           71.82
## 379   LP002379         1       0.19           72.01
## 380   LP002387         1       0.19           72.20
## 381   LP002390         1       0.19           72.39
## 382   LP002393         1       0.19           72.58
## 383   LP002398         1       0.19           72.77
## 384   LP002401         1       0.19           72.96
## 385   LP002407         1       0.19           73.15
## 386   LP002408         1       0.19           73.34
## 387   LP002409         1       0.19           73.53
## 388   LP002418         1       0.19           73.72
## 389   LP002429         1       0.19           73.91
## 390   LP002434         1       0.19           74.10
## 391   LP002435         1       0.19           74.29
## 392   LP002443         1       0.19           74.48
## 393   LP002444         1       0.19           74.67
## 394   LP002446         1       0.19           74.86
## 395   LP002447         1       0.19           75.05
## 396   LP002448         1       0.19           75.24
## 397   LP002449         1       0.19           75.43
## 398   LP002453         1       0.19           75.62
## 399   LP002455         1       0.19           75.81
## 400   LP002459         1       0.19           76.00
## 401   LP002467         1       0.19           76.19
## 402   LP002472         1       0.19           76.38
## 403   LP002473         1       0.19           76.57
## 404   LP002478         1       0.19           76.76
## 405   LP002484         1       0.19           76.95
## 406   LP002487         1       0.19           77.14
## 407   LP002489         1       0.19           77.33
## 408   LP002493         1       0.19           77.52
## 409   LP002494         1       0.19           77.71
## 410   LP002500         1       0.19           77.90
## 411   LP002502         1       0.19           78.09
## 412   LP002505         1       0.19           78.28
## 413   LP002515         1       0.19           78.47
## 414   LP002517         1       0.19           78.66
## 415   LP002519         1       0.19           78.85
## 416   LP002522         1       0.19           79.04
## 417   LP002524         1       0.19           79.23
## 418   LP002529         1       0.19           79.42
## 419   LP002530         1       0.19           79.61
## 420   LP002533         1       0.19           79.80
## 421   LP002534         1       0.19           79.99
## 422   LP002536         1       0.19           80.18
## 423   LP002537         1       0.19           80.37
## 424   LP002543         1       0.19           80.56
## 425   LP002544         1       0.19           80.75
## 426   LP002545         1       0.19           80.94
## 427   LP002555         1       0.19           81.13
## 428   LP002556         1       0.19           81.32
## 429   LP002560         1       0.19           81.51
## 430   LP002562         1       0.19           81.70
## 431   LP002571         1       0.19           81.89
## 432   LP002585         1       0.19           82.08
## 433   LP002586         1       0.19           82.27
## 434   LP002587         1       0.19           82.46
## 435   LP002588         1       0.19           82.65
## 436   LP002600         1       0.19           82.84
## 437   LP002602         1       0.19           83.03
## 438   LP002603         1       0.19           83.22
## 439   LP002606         1       0.19           83.41
## 440   LP002615         1       0.19           83.60
## 441   LP002618         1       0.19           83.79
## 442   LP002619         1       0.19           83.98
## 443   LP002622         1       0.19           84.17
## 444   LP002625         1       0.19           84.36
## 445   LP002626         1       0.19           84.55
## 446   LP002637         1       0.19           84.74
## 447   LP002640         1       0.19           84.93
## 448   LP002643         1       0.19           85.12
## 449   LP002659         1       0.19           85.31
## 450   LP002670         1       0.19           85.50
## 451   LP002682         1       0.19           85.69
## 452   LP002683         1       0.19           85.88
## 453   LP002684         1       0.19           86.07
## 454   LP002689         1       0.19           86.26
## 455   LP002690         1       0.19           86.45
## 456   LP002692         1       0.19           86.64
## 457   LP002697         1       0.19           86.83
## 458   LP002705         1       0.19           87.02
## 459   LP002706         1       0.19           87.21
## 460   LP002714         1       0.19           87.40
## 461   LP002716         1       0.19           87.59
## 462   LP002717         1       0.19           87.78
## 463   LP002720         1       0.19           87.97
## 464   LP002723         1       0.19           88.16
## 465   LP002732         1       0.19           88.35
## 466   LP002738         1       0.19           88.54
## 467   LP002739         1       0.19           88.73
## 468   LP002740         1       0.19           88.92
## 469   LP002741         1       0.19           89.11
## 470   LP002743         1       0.19           89.30
## 471   LP002753         1       0.19           89.49
## 472   LP002755         1       0.19           89.68
## 473   LP002757         1       0.19           89.87
## 474   LP002767         1       0.19           90.06
## 475   LP002768         1       0.19           90.25
## 476   LP002772         1       0.19           90.44
## 477   LP002776         1       0.19           90.63
## 478   LP002777         1       0.19           90.82
## 479   LP002778         1       0.19           91.01
## 480   LP002784         1       0.19           91.20
## 481   LP002785         1       0.19           91.39
## 482   LP002788         1       0.19           91.58
## 483   LP002789         1       0.19           91.77
## 484   LP002792         1       0.19           91.96
## 485   LP002794         1       0.19           92.15
## 486   LP002795         1       0.19           92.34
## 487   LP002798         1       0.19           92.53
## 488   LP002804         1       0.19           92.72
## 489   LP002807         1       0.19           92.91
## 490   LP002820         1       0.19           93.10
## 491   LP002821         1       0.19           93.29
## 492   LP002832         1       0.19           93.48
## 493   LP002833         1       0.19           93.67
## 494   LP002836         1       0.19           93.86
## 495   LP002837         1       0.19           94.05
## 496   LP002840         1       0.19           94.24
## 497   LP002841         1       0.19           94.43
## 498   LP002842         1       0.19           94.62
## 499   LP002847         1       0.19           94.81
## 500   LP002862         1       0.19           95.00
## 501   LP002863         1       0.19           95.19
## 502   LP002868         1       0.19           95.38
## 503   LP002872         1       0.19           95.57
## 504   LP002874         1       0.19           95.76
## 505   LP002877         1       0.19           95.95
## 506   LP002888         1       0.19           96.14
## 507   LP002892         1       0.19           96.33
## 508   LP002894         1       0.19           96.52
## 509   LP002898         1       0.19           96.71
## 510   LP002911         1       0.19           96.90
## 511   LP002912         1       0.19           97.09
## 512   LP002916         1       0.19           97.28
## 513   LP002917         1       0.19           97.47
## 514   LP002925         1       0.19           97.66
## 515   LP002926         1       0.19           97.85
## 516   LP002928         1       0.19           98.04
## 517   LP002931         1       0.19           98.23
## 518   LP002936         1       0.19           98.42
## 519   LP002940         1       0.19           98.61
## 520   LP002941         1       0.19           98.80
## 521   LP002943         1       0.19           98.99
## 522   LP002945         1       0.19           99.18
## 523   LP002948         1       0.19           99.37
## 524   LP002950         1       0.19           99.56
## 525   LP002953         1       0.19           99.75
## 526   LP002958         1       0.19           99.94
## 527   LP002960         1       0.19          100.13
## 528   LP002961         1       0.19          100.32
## 529   LP002964         1       0.19          100.51
## 530   LP002974         1       0.19          100.70
## 531   LP002978         1       0.19          100.89
## 532   LP002979         1       0.19          101.08
## 533   LP002983         1       0.19          101.27
## 534   LP002984         1       0.19          101.46
## 535   LP002990         1       0.19          100.00

##   Gender frequency percentage cumulative_perc
## 1   Male       435      81.31           81.31
## 2 Female       100      18.69          100.00

##   Married frequency percentage cumulative_perc
## 1     Yes       348      65.05           65.05
## 2      No       187      34.95          100.00

##   Dependents frequency percentage cumulative_perc
## 1          0       321      60.00           60.00
## 2          2        90      16.82           76.82
## 3          1        84      15.70           92.52
## 4         3+        40       7.48          100.00

##      Education frequency percentage cumulative_perc
## 1     Graduate       404      75.51           75.51
## 2 Not Graduate       131      24.49          100.00

##   Self_Employed frequency percentage cumulative_perc
## 1            No       474       88.6            88.6
## 2           Yes        61       11.4           100.0

##    Loan_Amount_Term frequency percentage cumulative_perc
## 1               360       459      85.79           85.79
## 2               180        36       6.73           92.52
## 3               480        14       2.62           95.14
## 4               300        10       1.87           97.01
## 5               240         4       0.75           97.76
## 6                84         4       0.75           98.51
## 7               120         3       0.56           99.07
## 8                36         2       0.37           99.44
## 9                60         2       0.37           99.81
## 10               12         1       0.19          100.00

##   Credit_History frequency percentage cumulative_perc
## 1              1       457      85.42           85.42
## 2              0        78      14.58          100.00

##   Property_Area frequency percentage cumulative_perc
## 1     Semiurban       205      38.32           38.32
## 2         Urban       170      31.78           70.10
## 3         Rural       160      29.91          100.00

##   Loan_Status frequency percentage cumulative_perc
## 1           Y       372      69.53           69.53
## 2           N       163      30.47          100.00

## data 
## 
##  13  Variables      535  Observations
## --------------------------------------------------------------------------------
## ï..Loan_ID 
##        n  missing distinct 
##      535        0      535 
## 
## lowest : LP001002 LP001003 LP001005 LP001006 LP001008
## highest: LP002978 LP002979 LP002983 LP002984 LP002990
## --------------------------------------------------------------------------------
## Gender 
##        n  missing distinct 
##      535        0        2 
##                         
## Value      Female   Male
## Frequency     100    435
## Proportion  0.187  0.813
## --------------------------------------------------------------------------------
## Married 
##        n  missing distinct 
##      535        0        2 
##                     
## Value        No  Yes
## Frequency   187  348
## Proportion 0.35 0.65
## --------------------------------------------------------------------------------
## Dependents 
##        n  missing distinct 
##      535        0        4 
##                                   
## Value          0     1     2    3+
## Frequency    321    84    90    40
## Proportion 0.600 0.157 0.168 0.075
## --------------------------------------------------------------------------------
## Education 
##        n  missing distinct 
##      535        0        2 
##                                     
## Value          Graduate Not Graduate
## Frequency           404          131
## Proportion        0.755        0.245
## --------------------------------------------------------------------------------
## Self_Employed 
##        n  missing distinct 
##      535        0        2 
##                       
## Value         No   Yes
## Frequency    474    61
## Proportion 0.886 0.114
## --------------------------------------------------------------------------------
## ApplicantIncome 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      535        0      438        1     4054     1986     1878     2200 
##      .25      .50      .75      .90      .95 
##     2752     3598     4891     6467     8022 
## 
## lowest :   150   210   645   674  1000, highest:  9833  9963 10000 10047 10139
## --------------------------------------------------------------------------------
## CoapplicantIncome 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      535        0      259    0.922     1323     1528        0        0 
##      .25      .50      .75      .90      .95 
##        0     1260     2194     3258     4130 
## 
## lowest :    0.00   16.12  189.00  240.00  242.00
## highest: 5500.00 5624.00 5625.00 5654.00 5701.00
## --------------------------------------------------------------------------------
## LoanAmount 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      535        0      161        1      127    49.51     55.7     71.0 
##      .25      .50      .75      .90      .95 
##    100.0    124.0    151.5    185.6    205.6 
## 
## lowest :   9  17  25  26  30, highest: 253 255 258 259 260
## --------------------------------------------------------------------------------
## Loan_Amount_Term 
##        n  missing distinct 
##      535        0       10 
## 
## lowest : 12  120 180 240 300, highest: 36  360 480 60  84 
##                                                                       
## Value         12   120   180   240   300    36   360   480    60    84
## Frequency      1     3    36     4    10     2   459    14     2     4
## Proportion 0.002 0.006 0.067 0.007 0.019 0.004 0.858 0.026 0.004 0.007
## --------------------------------------------------------------------------------
## Credit_History 
##        n  missing distinct 
##      535        0        2 
##                       
## Value          0     1
## Frequency     78   457
## Proportion 0.146 0.854
## --------------------------------------------------------------------------------
## Property_Area 
##        n  missing distinct 
##      535        0        3 
##                                         
## Value          Rural Semiurban     Urban
## Frequency        160       205       170
## Proportion     0.299     0.383     0.318
## --------------------------------------------------------------------------------
## Loan_Status 
##        n  missing distinct 
##      535        0        2 
##                       
## Value          N     Y
## Frequency    163   372
## Proportion 0.305 0.695
## --------------------------------------------------------------------------------

4 Tugas 4

Lakukan pemeriksaan distribusi densitas menggunakan R pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:

4.1 Univariat numerik

fhat <- kde(x=NoOutliersNum[,3])
plot_1 <- plot(fhat, cont=50, col.cont=4, cont.lwd=2, xlab="Loan Amount", drawpoints=TRUE)

fhat1 <- kde(x=NoOutliersNum[,2])
plot_2 <- plot(fhat1, cont=50, col.cont=4, cont.lwd=2, xlab="Coapplicant Income" , drawpoints=TRUE)

fhat3 <- kde(x=NoOutliersNum[,1])
plot_3 <- plot(fhat3, cont=50, col.cont=4, cont.lwd=2, xlab="Applicant Income", drawpoints=TRUE)

4.2 Bivariat numerik

fhat4 <- kde(x=NoOutliersNum[,2:3])
plot(fhat4, display="filled.contour", cont=seq(10,90,by=10), lwd=1)

plot(fhat4, display="persp", border=1)

4.3 Multivariat numerik

fhat5 <- kde(x=NoOutliersNum[,1:3])
plot(fhat5)

5 Tugas 5

Lakukan proses pengujian Hipotesis menggunakan R pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:

5.1 Hitunglah margin of error dan estimasi interval untuk proporsi peminjam bejenis kelamin perempuan dalam pada tingkat kepercayaan 95%.

library(MASS)                                          # load the MASS package 
k    = sum(NoOutliers$Gender == "Female")              # the sum of female responses
n    = sum(count(NoOutliers))                          # total data
pbar = k/n                                             # female student proportion in survey
SE   = sqrt(pbar*(1-pbar)/n); SE                       # standard error 
## [1] 0.01685443
E = qnorm(.975)*SE; E                                  # margin of error
## [1] 0.03303407
pbar + c(-E, E)                                        # the CI of sample proportion
## [1] 0.1538818 0.2199500

5.2 Jika anda berencana menggunakan perkiraan proporsi 50% data konsumen berjenis kelamin perempuan, temukan ukuran sampel yang diperlukan untuk mencapai margin kesalahan 5% untuk data obeservasi pada tingkat kepercayaan 95%.

zstar = qnorm(.975)                                    # quantiles (95% confidence level)
p = 0.5                                                # 50% planned proportion estimate
E = 0.05                                               # expected error
zstar^2*p*(1-p)/E^2                                    # sampling size
## [1] 384.1459

5.3 Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, jika Bank mengklaim bahwa pinjaman rata-rata konsumen adalah:

set.seed(100)
DataSample <- sample_n(NoOutliers, 30)
DataSample

5.3.1 Lebih besar $ 150.

\[H_0 = \mu \leq \$150 \\ \] \[H_1 = \mu > \$150 \]

mu0 = 150                                              # hypothesized value  
xbar = mean(DataSample$LoanAmount)                     # sample mean 
s = sd(DataSample$LoanAmount)                          # sample standard deviation 
n = sum(count(DataSample))                             # sample size 
t = (xbar-mu0)/(s/sqrt(n));t                           # test statistic  
## [1] -2.520249
alpha = .05                                            # use 0.05 left tail significant level 
t.alpha = qt(1-alpha, df=n-1)                          # right tail critical value    
t.alpha
## [1] 1.699127

Karena \(t_{hitung}<t_{table}\), maka terima \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen lebih kecil sama dengan $150 dengan tingkat signifikansi 5%.

5.3.2 Lebih kecil $150

\[H_0 = \mu \geq \$150 \\ \] \[H_1 = \mu < \$150 \]

mu0 = 150                                            # hypothesized value  
xbar = mean(DataSample$LoanAmount)                   # sample mean 
s = sd(DataSample$LoanAmount)                        # sample standard deviation 
n = sum(count(DataSample))                           # sample size 
t = (xbar-mu0)/(s/sqrt(n));t                         # test statistic  
## [1] -2.520249
alpha = .05                                          # use 0.05 left tail significant level 
t.alpha = qt(1-alpha, df=n-1)                        # right tail critical value
-t.alpha                                             # left tail critical value 
## [1] -1.699127

Karena \(t_{hitung}<t_{table}\), maka tolak \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen lebih kecil sama dengan $150 dengan tingkat signifikansi 5%.

5.3.3 Sama dengan $ 150.

\[H_0 = \mu = \$150 \\ \] \[H_1 = \mu \neq \$150 \]

mu0 = 150                                            # hypothesized value  
xbar = mean(DataSample$LoanAmount)                   # sample mean 
s = sd(DataSample$LoanAmount)                        # sample standard deviation 
n = sum(count(DataSample))                           # sample size 
t = (xbar-mu0)/(s/sqrt(n));t                         # test statistic  
## [1] -2.520249
alpha = .05                                          # use 0.05 left tail significant level 
t.alpha = qt(1-alpha, df=n-1)                      # right tail critical value    
t.alpha    
## [1] 1.699127
-t.alpha
## [1] -1.699127

Karena \(t_{hitung}\) berada diluar \(interval~t_{table}\), maka tolak \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen tidak sama dengan $150 dengan tingkat signifikansi 5%.

5.4 Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, seperti diatas jika diketahui simpangan baku pinjaman adalah $ 85.

5.4.1 Lebih besar $ 150.

\[H_0 = \mu \leq \$150 \\ \] \[H_1 = \mu > \$150 \]

mu0 = 150                                              # hypothesized value  
xbar = mean(DataSample$LoanAmount)                     # sample mean 
s = 85                                                 # sample standard deviation 
n = sum(count(DataSample))                             # sample size 
t = (xbar-mu0)/(s/sqrt(n));t                           # test statistic  
## [1] -1.546511
alpha = .05                                            # use 0.05 left tail significant level 
t.alpha = qt(1-alpha, df=n-1)                          # right tail critical value    
t.alpha
## [1] 1.699127

Karena \(t_{hitung}<t_{table}\), maka terima \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen lebih besar sama dengan $150 dengan tingkat signifikansi 5%.

5.4.2 Lebih kecil $150

\[H_0 = \mu \geq \$150 \\ \] \[H_1 = \mu < \$150 \]

mu0 = 150                                            # hypothesized value  
xbar = mean(DataSample$LoanAmount)                   # sample mean 
s = 85                                               # sample standard deviation 
n = sum(count(DataSample))                           # sample size 
t = (xbar-mu0)/(s/sqrt(n));t                         # test statistic  
## [1] -1.546511
alpha = .05                                          # use 0.05 left tail significant level 
t.alpha = qt(1-alpha, df=n-1)                        # right tail critical value
-t.alpha                                             # left tail critical value 
## [1] -1.699127

Karena \(t_{hitung}<t_{table}\), maka terima \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen lebih kecil sama dengan $150 dengan tingkat signifikansi 5%.

5.4.3 Sama dengan $ 150.

\[H_0 = \mu = \$150 \\ \] \[H_1 = \mu \neq \$150 \]

mu0 = 150                                            # hypothesized value  
xbar = mean(DataSample$LoanAmount)                   # sample mean 
s = 85                                               # sample standard deviation  
n = sum(count(DataSample))                           # sample size 
t = (xbar-mu0)/(s/sqrt(n));t                         # test statistic  
## [1] -1.546511
alpha = .05                                          # use 0.05 left tail significant level 
t.alpha = qt(1-alpha, df=n-1)                        # right tail critical value    
t.alpha    
## [1] 1.699127
-t.alpha
## [1] -1.699127

Karena \(t_{hitung}\) berada didalam \(interval~t_{table}\), maka terima \(H_0\). Sehingga dapat kita simpulkan bahwa rata-rata pinjaman konsumen sama dengan $150 dengan tingkat signifikansi 5%.