KOMPUTASI STATISTIKA

~ Ujian Tengah Semester ~


Kontak : \(\downarrow\)
Email
Instagram https://www.instagram.com/boring.garr/
RPubs https://rpubs.com/garr/

library

library(reticulate)
library(Rcpp)
use_condaenv("py10", required = TRUE)
library(tidyverse)
library(leaps)
library(ggplot2)
library(scales)
library(readr)
library(dplyr)                                    
library(magrittr)
library(e1071)
library(funModeling) 
library(Hmisc)
library(skimr)
library(plotly)
library(GGally)
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler
from matplotlib import markers
import numpy as np
import pandas as pd
import scipy.stats as ss
import seaborn as sns
import sweetviz as sv
import math

Data Set

Kumpulan data akan anda gunakan dalam ujian tengah semester ini adalah data konsumen yang melakukan pinjaman di suatu Bank. Dataset ini memiliki 613 observasi, 13 atribut sebagai berikut:

Tugas 1

Lakukan proses persiapan data dengan R dan Python, dengan beberapa langkah berikut:

Import Data

R

data <- read.csv("loan-train.csv", sep = ",")
data

Python

dataphy = pd.read_csv("loan-train.csv")
dataphy
##       Loan_ID  Gender Married  ... Credit_History Property_Area Loan_Status
## 0    LP001002    Male      No  ...            1.0         Urban           Y
## 1    LP001003    Male     Yes  ...            1.0         Rural           N
## 2    LP001005    Male     Yes  ...            1.0         Urban           Y
## 3    LP001006    Male     Yes  ...            1.0         Urban           Y
## 4    LP001008    Male      No  ...            1.0         Urban           Y
## ..        ...     ...     ...  ...            ...           ...         ...
## 609  LP002978  Female      No  ...            1.0         Rural           Y
## 610  LP002979    Male     Yes  ...            1.0         Rural           Y
## 611  LP002983    Male     Yes  ...            1.0         Urban           Y
## 612  LP002984    Male     Yes  ...            1.0         Urban           Y
## 613  LP002990  Female      No  ...            0.0     Semiurban           N
## 
## [614 rows x 13 columns]

Penanganan Data Hilang

R

sum(is.na(data))
## [1] 86
data.clean <- na.omit(data)
apply(is.na(data),2, which) 
## $Loan_ID
## integer(0)
## 
## $Gender
## integer(0)
## 
## $Married
## integer(0)
## 
## $Dependents
## integer(0)
## 
## $Education
## integer(0)
## 
## $Self_Employed
## integer(0)
## 
## $ApplicantIncome
## integer(0)
## 
## $CoapplicantIncome
## integer(0)
## 
## $LoanAmount
##  [1]   1  36  64  82  96 103 104 114 128 203 285 306 323 339 388 436 438 480 525
## [20] 551 552 606
## 
## $Loan_Amount_Term
##  [1]  20  37  45  46  74 113 166 198 224 233 336 368 422 424
## 
## $Credit_History
##  [1]  17  25  31  43  80  84  87  96 118 126 130 131 157 182 188 199 220 237 238
## [20] 260 261 280 310 314 318 319 324 349 364 378 393 396 412 445 450 452 461 474
## [39] 491 492 498 504 507 531 534 545 557 566 584 601
## 
## $Property_Area
## integer(0)
## 
## $Loan_Status
## integer(0)
data[is.na(data)] = 0
data

Python

dataphy.isna().sum()
## Loan_ID               0
## Gender               13
## Married               3
## Dependents           15
## Education             0
## Self_Employed        32
## ApplicantIncome       0
## CoapplicantIncome     0
## LoanAmount           22
## Loan_Amount_Term     14
## Credit_History       50
## Property_Area         0
## Loan_Status           0
## dtype: int64
dataphy['Gender'] = dataphy['Gender'].fillna(dataphy['Gender'].value_counts().index[0])
dataphy['Married'] = dataphy['Married'].fillna(method='ffill')
dataphy['Dependents'] = dataphy['Dependents'].fillna((dataphy['Dependents'].mode()).iloc[0])
dataphy['Self_Employed'] = dataphy['Self_Employed'].fillna(method='Bfill')
dataphy['LoanAmount'] = dataphy['LoanAmount'].fillna((dataphy['LoanAmount'].median()))
dataphy['Loan_Amount_Term'] = dataphy['Loan_Amount_Term'].fillna(dataphy['Loan_Amount_Term'].mean())
dataphy['Credit_History'] = dataphy['Credit_History'].interpolate(limit_direction="both")

dataphy.isna().sum()
## Loan_ID              0
## Gender               0
## Married              0
## Dependents           0
## Education            0
## Self_Employed        0
## ApplicantIncome      0
## CoapplicantIncome    0
## LoanAmount           0
## Loan_Amount_Term     0
## Credit_History       0
## Property_Area        0
## Loan_Status          0
## dtype: int64

Periksa Data Duplikat

R

length(unique(data.clean)) == nrow(data.clean)
## [1] FALSE

Python

dataphy[dataphy.duplicated(keep=False)]
## Empty DataFrame
## Columns: [Loan_ID, Gender, Married, Dependents, Education, Self_Employed, ApplicantIncome, CoapplicantIncome, LoanAmount, Loan_Amount_Term, Credit_History, Property_Area, Loan_Status]
## Index: []

Pemisahan Data Kategori dan Numerik

R

datakat <- select_if(data.clean, is.character)
datakat
datanum <- select_if(data.clean, is.numeric)
datanum

Python

datanum = dataphy.select_dtypes(include=[np.number])
datanum
##      ApplicantIncome  CoapplicantIncome  ...  Loan_Amount_Term  Credit_History
## 0               5849                0.0  ...             360.0             1.0
## 1               4583             1508.0  ...             360.0             1.0
## 2               3000                0.0  ...             360.0             1.0
## 3               2583             2358.0  ...             360.0             1.0
## 4               6000                0.0  ...             360.0             1.0
## ..               ...                ...  ...               ...             ...
## 609             2900                0.0  ...             360.0             1.0
## 610             4106                0.0  ...             180.0             1.0
## 611             8072              240.0  ...             360.0             1.0
## 612             7583                0.0  ...             360.0             1.0
## 613             4583                0.0  ...             360.0             0.0
## 
## [614 rows x 5 columns]
datakat = dataphy.select_dtypes(exclude=[np.number])
datakat
##       Loan_ID  Gender Married  ... Self_Employed Property_Area Loan_Status
## 0    LP001002    Male      No  ...            No         Urban           Y
## 1    LP001003    Male     Yes  ...            No         Rural           N
## 2    LP001005    Male     Yes  ...           Yes         Urban           Y
## 3    LP001006    Male     Yes  ...            No         Urban           Y
## 4    LP001008    Male      No  ...            No         Urban           Y
## ..        ...     ...     ...  ...           ...           ...         ...
## 609  LP002978  Female      No  ...            No         Rural           Y
## 610  LP002979    Male     Yes  ...            No         Rural           Y
## 611  LP002983    Male     Yes  ...            No         Urban           Y
## 612  LP002984    Male     Yes  ...            No         Urban           Y
## 613  LP002990  Female      No  ...           Yes     Semiurban           N
## 
## [614 rows x 8 columns]

Penanganan Data Numerik

R

scale <- as.data.frame(lapply(datanum,scale))
scale
normal <- function(x){(x- min(x))/(max(x)-min(x))}

normalisasi <- as.data.frame(lapply(datanum,normal))
normalisasi
robust <- function(x){(x-median(x))/quantile(x,probs = .75)-quantile(x,probs = .25)}

robust_skala <- as.data.frame(lapply(datanum,robust))
robust_skala

python

scale = StandardScaler()
scale = scale.fit(datanum)
normalisasi = scale.transform(datanum)
for i in range(5):
  print(normalisasi[i])
## [ 0.07299082 -0.55448733 -0.21124125  0.27985054  0.44783036]
## [-0.13441195 -0.03873155 -0.21124125  0.27985054  0.44783036]
## [-0.39374734 -0.55448733 -0.94899647  0.27985054  0.44783036]
## [-0.46206247  0.2519796  -0.30643547  0.27985054  0.44783036]
## [ 0.09772844 -0.55448733 -0.05655064  0.27985054  0.44783036]
invers = scale.inverse_transform(normalisasi)
for i in range(5):
  print(invers[i])
## [5.849e+03 0.000e+00 1.280e+02 3.600e+02 1.000e+00]
## [4.583e+03 1.508e+03 1.280e+02 3.600e+02 1.000e+00]
## [3.0e+03 0.0e+00 6.6e+01 3.6e+02 1.0e+00]
## [2.583e+03 2.358e+03 1.200e+02 3.600e+02 1.000e+00]
## [6.00e+03 0.00e+00 1.41e+02 3.60e+02 1.00e+00]
rscale = RobustScaler()
rscale.fit(datanum)
RobustScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
rscale.transform(datanum)
## array([[ 0.69802913, -0.51735771,  0.        ,  0.        ,  0.        ],
##        [ 0.26409597,  0.13907933,  0.        ,  0.        ,  0.        ],
##        [-0.27849186, -0.51735771, -0.96124031,  0.        ,  0.        ],
##        ...,
##        [ 1.45998286, -0.41288497,  1.9379845 ,  0.        ,  0.        ],
##        [ 1.29237361, -0.51735771,  0.91472868,  0.        ,  0.        ],
##        [ 0.26409597, -0.51735771,  0.07751938,  0.        , -1.        ]])

Penanganan Data Pencilan

R

outliers <- function(x) {
  Q1 <- quantile(x, probs = .25)
  Q3 <- quantile(x, probs = .75)
  iqr <- IQR(x)
  
  upper_limit <- Q3 + (1.5*iqr)
  lower_limit <- Q1 - (1.5*iqr)
}
Dt_pen <- data
outliers<-function(x){
  Q1 <- quantile (x, probs=.25) 
  Q3 <- quantile(x, probs=.75)
  iqr = Q3-Q1

upper_limit =Q3 + (iqr*1.5) 
lower_limit =Q1 -(iqr*1.5)

x > upper_limit | x < lower_limit}


outlier1 <- subset(Dt_pen, outliers (Dt_pen$LoanAmount))
outlier2 <- subset(Dt_pen, outliers (Dt_pen$ApplicantIncome)) 
outlier3 <- subset(Dt_pen, outliers (Dt_pen$CoapplicantIncome))

data_noNA <- rbind(outlier1, outlier2, outlier3) %>% distinct()

data_noNA

Python

q25, q75 = np.percentile(datanum['ApplicantIncome'], 25), np.percentile(datanum['ApplicantIncome'], 75)
iqr = q75-q25

print(f'Percentile ApplicantIncome: 25th={np.round(q25)}, 75th={np.round(q75)}, IQR={np.round(iqr)}')
## Percentile ApplicantIncome: 25th=2878.0, 75th=5795.0, IQR=2918.0
cut = iqr*1.5
lower, upper = q25 - cut, q75 + cut

outliers = [x for x in datanum['ApplicantIncome'] if x < lower or x > upper]
print('Data Pencilan ApplicantIncome: %d' % len(outliers))
## Data Pencilan ApplicantIncome: 50
datanum.drop(datanum[ (datanum['ApplicantIncome'] > upper) | (datanum['ApplicantIncome'] < lower) ].index , inplace=True)
q25, q75 = np.percentile(datanum['CoapplicantIncome'], 25), np.percentile(datanum['CoapplicantIncome'], 75)
iqr = q75-q25

print(f'Percentile CoapplicantIncome: 25th={np.round(q25)}, 75th={np.round(q75)}, IQR={np.round(iqr)}')
## Percentile CoapplicantIncome: 25th=0.0, 75th=2337.0, IQR=2337.0
cut = iqr*1.5
lower, upper = q25 - cut, q75 + cut

outliers = [x for x in datanum['CoapplicantIncome'] if x < lower or x > upper]
print('Data Pencilan CoapplicantIncome: %d' % len(outliers))
## Data Pencilan CoapplicantIncome: 16
datanum.drop(datanum[ (datanum['CoapplicantIncome'] > upper) | (datanum['CoapplicantIncome'] < lower) ].index , inplace=True)
q25, q75 = np.percentile(datanum['LoanAmount'], 25), np.percentile(datanum['LoanAmount'], 75)
iqr = q75-q25

print(f'Percentile LoanAmount: 25th={np.round(q25)}, 75th={np.round(q75)}, IQR={np.round(iqr)}')
## Percentile LoanAmount: 25th=100.0, 75th=155.0, IQR=55.0
cut = iqr*1.5
lower, upper = q25 - cut, q75 + cut

outliers = [x for x in datanum['LoanAmount'] if x < lower or x > upper]
print('Data Pencilan LoanAmount: %d' % len(outliers))
## Data Pencilan LoanAmount: 28
datanum.drop(datanum[ (datanum['LoanAmount'] > upper) | (datanum['LoanAmount'] < lower) ].index , inplace=True)
q25, q75 = np.percentile(datanum['Loan_Amount_Term'], 25), np.percentile(datanum['Loan_Amount_Term'], 75)
iqr = q75-q25

print(f'Percentile Loan_Amount_Term: 25th={np.round(q25)}, 75th={np.round(q75)}, IQR={np.round(iqr)}')
## Percentile Loan_Amount_Term: 25th=360.0, 75th=360.0, IQR=0.0
cut = iqr*1.5
lower, upper = q25 - cut, q75 + cut

outliers = [x for x in datanum['Loan_Amount_Term'] if x < lower or x > upper]
print('Data Pencilan Loan_Amount_Term: %d' % len(outliers))
## Data Pencilan Loan_Amount_Term: 89
datanum.drop(datanum[ (datanum['Loan_Amount_Term'] > upper) | (datanum['Loan_Amount_Term'] < lower) ].index , inplace=True)
q25, q75 = np.percentile(datanum['Credit_History'], 25), np.percentile(datanum['Credit_History'], 75)
iqr = q75-q25

print(f'Percentile Credit_History: 25th={np.round(q25)}, 75th={np.round(q75)}, IQR={np.round(iqr)}')
## Percentile Credit_History: 25th=1.0, 75th=1.0, IQR=0.0
cut = iqr*1.5
lower, upper = q25 - cut, q75 + cut

outliers = [x for x in datanum['Credit_History'] if x < lower or x > upper]
print('Data Pencilan Credit_History: %d' % len(outliers))
## Data Pencilan Credit_History: 69
datanum.drop(datanum[ (datanum['Credit_History'] > upper) | (datanum['Credit_History'] < lower) ].index , inplace=True)

Penanganan Data Kategorikal

R

datakat %>% summarise_all(n_distinct)
GenderLabel <-factor(datakat$Gender, labels=c(0, 1, 2))
MarriedLabel <-factor(datakat$Married, labels=c(0, 1, 2))
DependentsLabel <-factor (datakat$Dependents, labels=c(0, 1, 2, 3, 4))
EducationLabel <-factor(datakat$Education, labels=c(0, 1))
Self_EmployedLabel <-factor(datakat$Self_Employed, labels=c(0, 1, 2))
Property_AreaLabel <- factor (datakat$Property_Area, labels=c(0, 1, 2))
Loan_StatusLabel <-factor (datakat$Loan_Status, labels=c(0, 1))

datakat_labeled <- data.frame("ID" = datakat$Loan_ID, GenderLabel, MarriedLabel, DependentsLabel, EducationLabel, Self_EmployedLabel, Property_AreaLabel, Loan_StatusLabel)

datakat_labeled

Python

datakat.info()
## <class 'pandas.core.frame.DataFrame'>
## RangeIndex: 614 entries, 0 to 613
## Data columns (total 8 columns):
##  #   Column         Non-Null Count  Dtype 
## ---  ------         --------------  ----- 
##  0   Loan_ID        614 non-null    object
##  1   Gender         614 non-null    object
##  2   Married        614 non-null    object
##  3   Dependents     614 non-null    object
##  4   Education      614 non-null    object
##  5   Self_Employed  614 non-null    object
##  6   Property_Area  614 non-null    object
##  7   Loan_Status    614 non-null    object
## dtypes: object(8)
## memory usage: 38.5+ KB
label = datakat.apply(LabelEncoder().fit_transform)
label.head()
##    Loan_ID  Gender  Married  ...  Self_Employed  Property_Area  Loan_Status
## 0        0       1        0  ...              0              2            1
## 1        1       1        1  ...              0              0            0
## 2        2       1        1  ...              1              2            1
## 3        3       1        1  ...              0              2            1
## 4        4       1        0  ...              0              2            1
## 
## [5 rows x 8 columns]
dtkat = datakat[['Gender','Married','Dependents','Education','Self_Employed','Property_Area','Loan_Status']]
dtkat.head()
##   Gender Married Dependents  ... Self_Employed Property_Area Loan_Status
## 0   Male      No          0  ...            No         Urban           Y
## 1   Male     Yes          1  ...            No         Rural           N
## 2   Male     Yes          0  ...           Yes         Urban           Y
## 3   Male     Yes          0  ...            No         Urban           Y
## 4   Male      No          0  ...            No         Urban           Y
## 
## [5 rows x 7 columns]
catdt = dtkat.copy()
catdt = pd.get_dummies(catdt, columns=['Gender'], prefix = ['Gender'])

print(catdt)
##     Married Dependents     Education  ... Loan_Status Gender_Female Gender_Male
## 0        No          0      Graduate  ...           Y             0           1
## 1       Yes          1      Graduate  ...           N             0           1
## 2       Yes          0      Graduate  ...           Y             0           1
## 3       Yes          0  Not Graduate  ...           Y             0           1
## 4        No          0      Graduate  ...           Y             0           1
## ..      ...        ...           ...  ...         ...           ...         ...
## 609      No          0      Graduate  ...           Y             1           0
## 610     Yes         3+      Graduate  ...           Y             0           1
## 611     Yes          1      Graduate  ...           Y             0           1
## 612     Yes          2      Graduate  ...           Y             0           1
## 613      No          0      Graduate  ...           N             1           0
## 
## [614 rows x 8 columns]

Tugas 2

Lakukan Proses Visualisasi Data dengan menggunakan R dan Python dengan beberapa langkah berikut:

Visualisasi Univariabel

data kategori

R

ggplot(data, aes(x = Gender)) +                
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                         
  theme_minimal() +                                 
  labs(x = "gender",
       title = "Gender")  

ggplot(data, aes(x = Education)) +                
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                         
  theme_minimal() +                                 
  labs(x = "Education",
       title = "Education")  

ggplot(data, aes(x = Married)) +                
  geom_bar(fill = "cornflowerblue", 
           color= "azure4") +                         
  theme_minimal() +                                 
  labs(x = "Married",
       title = "Married")  

Python

datakat['Gender'].value_counts()
## Male      502
## Female    112
## Name: Gender, dtype: int64
datakat['Gender'].value_counts().plot(kind='bar')

datakat['Education'].value_counts()
## Graduate        480
## Not Graduate    134
## Name: Education, dtype: int64
datakat['Education'].value_counts().plot(kind='bar')

datakat['Married'].value_counts()
## Yes    400
## No     214
## Name: Married, dtype: int64
datakat['Married'].value_counts().plot(kind='bar')

data numerik

R

ggplot(data, 
       aes(x = ApplicantIncome, 
           y= ..count.. / sum(..count..))) +
  geom_histogram(fill = "cornflowerblue", 
                 color = "white", 
                 binwidth = 5) +
  theme_minimal() +                                  
  labs(title="ApplicantIncome", 
       y = "Percent",
       x = "ApplicantIncome") +
  scale_y_continuous(labels = percent)

ggplot(data, 
       aes(x = CoapplicantIncome, 
           y= ..count.. / sum(..count..))) +
  geom_histogram(fill = "cornflowerblue", 
                 color = "white", 
                 binwidth = 5) +
  theme_minimal() +                                  
  labs(title="CoapplicantIncome", 
       y = "Percent",
       x = "CoapplicantIncome") +
  scale_y_continuous(labels = percent)

ggplot(data, 
       aes(x = LoanAmount, 
           y= ..count.. / sum(..count..))) +
  geom_histogram(fill = "cornflowerblue", 
                 color = "white", 
                 binwidth = 5) +
  theme_minimal() +                                  
  labs(title="LoanAmount", 
       y = "Percent",
       x = "LoanAmount") +
  scale_y_continuous(labels = percent)

Python

plt.figure(figsize=(8,6))
sns.set_theme(style="darkgrid")
sns.histplot(data=datanum, x="ApplicantIncome", bins=20, kde=True)
plt.show()

plt.figure(figsize=(8,6))
sns.set_theme(style="darkgrid")
sns.histplot(data=datanum, x="CoapplicantIncome", bins=20, kde=True)
plt.show()

plt.figure(figsize=(8,6))
sns.set_theme(style="darkgrid")
sns.histplot(data=datanum, x="LoanAmount", bins=20, kde=True)
plt.show()

Visualisasi Bivariabel

R

kategori vs kategori

ggplot(data, aes(x = Gender, fill = Education)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

ggplot(data, aes(x = Married, fill = Education)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

ggplot(data, aes(x = Education, fill = Property_Area)) +
  theme_minimal() +                                  
  geom_bar(position = position_dodge(preserve = "single"))

Numerik vs Numerik

ggplot(data, 
       aes(x = LoanAmount, 
           y = ApplicantIncome)) +
  geom_point(color="cornflowerblue", 
             size = 2, 
             alpha=.8) +
  scale_y_continuous(label = scales::dollar, 
                     limits = c(0, 10000)) +
  scale_x_continuous(breaks = seq(0, 60, 10), 
                     limits=c(0, 60)) +
  theme_minimal() +                                 
  labs(x = "LoanAmount",
       y = "ApplicantIncome",
       title = "",
       subtitle = "")

ggplot(data, 
       aes(x = LoanAmount, 
           y = CoapplicantIncome)) +
  geom_point(color="cornflowerblue", 
             size = 2, 
             alpha=.8) +
  scale_y_continuous(label = scales::dollar, 
                     limits = c(0, 10000)) +
  scale_x_continuous(breaks = seq(0, 60, 10), 
                     limits=c(0, 60)) +
  theme_minimal() +                                
  labs(x = " LoanAmount",
       y = "CoapplicantIncome",
       title = "",
       subtitle = "")

ggplot(data, 
       aes(x = ApplicantIncome, 
           y = CoapplicantIncome)) +
  geom_point(color="cornflowerblue", 
             size = 2, 
             alpha=.8) +
  scale_y_continuous(label = scales::dollar, 
                     limits = c(0, 10000)) +
  scale_x_continuous(breaks = seq(0, 60, 10), 
                     limits=c(0, 60)) +
  theme_minimal() +                                 
  labs(x = "ApplicantIncome",
       y = "CoapplicantIncome",
       title = "",
       subtitle = "")

ggplot(data, 
       aes(x = LoanAmount, 
           y = Loan_Amount_Term)) +
  geom_point(color="cornflowerblue", 
             size = 2, 
             alpha=.8) +
  scale_y_continuous(label = scales::dollar, 
                     limits = c(0, 10000)) +
  scale_x_continuous(breaks = seq(0, 60, 10), 
                     limits=c(0, 60)) +
  theme_minimal() +                                  
  labs(x = "LoanAmount",
       y = "Loan_Amount_Term",
       title = "",
       subtitle = "")

Python

kategori vs kategori

datplot = datanum[['ApplicantIncome','CoapplicantIncome']]
datplot.plot.line()

Numerik vs Numerik

plt.figure(figsize=(8,6))
sns.set_theme(style="darkgrid")
sns.scatterplot(data=datanum, x="ApplicantIncome", y="CoapplicantIncome")
plt.show()

sns.set_theme(style = "darkgrid")
sns.violinplot(x = "Property_Area",
y = datanum["LoanAmount"],
hue = "Gender",
data=datakat)

plt.show()

sns.set_theme(style = "darkgrid")
sns.boxplot(x = "Dependents",
y = datanum["LoanAmount"],
hue = "Married",
data=datakat)

plt.show()

Visualisasi Multivariabel

R

ggplot(data, 
       aes(x = LoanAmount, 
           y = ApplicantIncome, 
           color = Education, 
           shape = Gender)) +
  geom_point(size = 3, alpha = .6) +
  theme_minimal() +
  labs(title = "")

Python

x = datakat['Gender'].loc[0:361]
datanum['gender'] = x
dtnum = datanum[['ApplicantIncome','CoapplicantIncome','LoanAmount','gender']]
sns.pairplot(dtnum,
kind = "scatter",
hue = "gender",
markers = ["o","s"],
palette = "Set2")

Tugas 3

Lakukan proses analisa data secara deskriptif menggunakan R dan Python dengan beberapa langkah berikut:

Kualitatif

Kategori Univariat

R

apply(is.na(data),2, which)
## integer(0)
df.data<-na.omit(data)                              
head(df.data,3)
kat <- table(df.data$Gender)                    
kat
## 
##        Female   Male 
##     13    112    489
prop.table(table(df.data$Gender))
## 
##                Female       Male 
## 0.02117264 0.18241042 0.79641694

Python

C_Gender = dataphy.Gender.value_counts()
C_Married = dataphy.Married.value_counts()
C_Dep = dataphy.value_counts()
C_Edu = dataphy.Education.value_counts()
C_SelfEmplyd = dataphy.Self_Employed.value_counts()
C_LAmount = dataphy.Loan_Amount_Term.value_counts()
C_CrdtHis = dataphy.Credit_History.value_counts()
C_PropertyArea = dataphy.Property_Area.value_counts()
C_LStat = dataphy.Loan_Status.value_counts()

print(C_Gender,
 C_Married,
 C_Dep,
 C_Edu,
 C_SelfEmplyd,
 C_LAmount,
 C_CrdtHis,
 C_PropertyArea,
 C_LStat,
 sep = '\n')
## Male      502
## Female    112
## Name: Gender, dtype: int64
## Yes    400
## No     214
## Name: Married, dtype: int64
## Loan_ID   Gender  Married  Dependents  Education     Self_Employed  ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  Credit_History  Property_Area  Loan_Status
## LP001002  Male    No       0           Graduate      No             5849             0.0                128.0       360.0             1.0             Urban          Y              1
## LP002328  Male    Yes      0           Not Graduate  No             6096             0.0                218.0       360.0             0.0             Rural          N              1
## LP002305  Female  No       0           Graduate      No             4547             0.0                115.0       360.0             1.0             Semiurban      Y              1
## LP002308  Male    Yes      0           Not Graduate  No             2167             2400.0             115.0       360.0             1.0             Urban          Y              1
## LP002314  Female  No       0           Not Graduate  No             2213             0.0                66.0        360.0             1.0             Rural          Y              1
##                                                                                                                                                                                    ..
## LP001692  Female  No       0           Not Graduate  No             4408             0.0                120.0       360.0             1.0             Semiurban      Y              1
## LP001693  Female  No       0           Graduate      No             3244             0.0                80.0        360.0             1.0             Urban          Y              1
## LP001698  Male    No       0           Not Graduate  No             3975             2531.0             55.0        360.0             1.0             Rural          Y              1
## LP001699  Male    No       0           Graduate      No             2479             0.0                59.0        360.0             1.0             Urban          Y              1
## LP002990  Female  No       0           Graduate      Yes            4583             0.0                133.0       360.0             0.0             Semiurban      N              1
## Length: 614, dtype: int64
## Graduate        480
## Not Graduate    134
## Name: Education, dtype: int64
## No     528
## Yes     86
## Name: Self_Employed, dtype: int64
## 360.0    512
## 180.0     44
## 480.0     15
## 342.0     14
## 300.0     13
## 240.0      4
## 84.0       4
## 120.0      3
## 60.0       2
## 36.0       2
## 12.0       1
## Name: Loan_Amount_Term, dtype: int64
## 1.000000    508
## 0.000000     92
## 0.500000     12
## 0.333333      1
## 0.666667      1
## Name: Credit_History, dtype: int64
## Semiurban    233
## Urban        202
## Rural        179
## Name: Property_Area, dtype: int64
## Y    422
## N    192
## Name: Loan_Status, dtype: int64
P_Gender = dataphy.Gender.value_counts() / len(dataphy)
P_Married = dataphy.Married.value_counts() / len(dataphy)
P_Dep = dataphy.Dependents.value_counts() / len(dataphy)
P_Edu = dataphy.Education.value_counts() / len(dataphy)
P_SelfEmplyd = dataphy.Self_Employed.value_counts() / len(dataphy)
P_LAmount = dataphy.Loan_Amount_Term.value_counts() / len(dataphy)
P_CrdtHis = dataphy.Credit_History.value_counts() / len(dataphy)
P_PropertyArea = dataphy.Property_Area.value_counts() / len(dataphy)
P_LStat = dataphy.Loan_Status.value_counts() / len(dataphy)

(P_Gender,
 P_Married,
 P_Dep,
 P_Edu,
 P_SelfEmplyd,
 P_LAmount,
 P_CrdtHis,
 P_PropertyArea,
 P_LStat)
## (Male      0.81759
## Female    0.18241
## Name: Gender, dtype: float64, Yes    0.651466
## No     0.348534
## Name: Married, dtype: float64, 0     0.586319
## 1     0.166124
## 2     0.164495
## 3+    0.083062
## Name: Dependents, dtype: float64, Graduate        0.781759
## Not Graduate    0.218241
## Name: Education, dtype: float64, No     0.859935
## Yes    0.140065
## Name: Self_Employed, dtype: float64, 360.0    0.833876
## 180.0    0.071661
## 480.0    0.024430
## 342.0    0.022801
## 300.0    0.021173
## 240.0    0.006515
## 84.0     0.006515
## 120.0    0.004886
## 60.0     0.003257
## 36.0     0.003257
## 12.0     0.001629
## Name: Loan_Amount_Term, dtype: float64, 1.000000    0.827362
## 0.000000    0.149837
## 0.500000    0.019544
## 0.333333    0.001629
## 0.666667    0.001629
## Name: Credit_History, dtype: float64, Semiurban    0.379479
## Urban        0.328990
## Rural        0.291531
## Name: Property_Area, dtype: float64, Y    0.687296
## N    0.312704
## Name: Loan_Status, dtype: float64)

Kategori Bivariat

R

kat2<- df.data %>%                                 
select(Gender, Education) %>%                      
table()                                           
kat2
##         Education
## Gender   Graduate Not Graduate
##                12            1
##   Female       92           20
##   Male        376          113

Python

pd.crosstab(dataphy.Gender,dataphy.Property_Area)
## Property_Area  Rural  Semiurban  Urban
## Gender                                
## Female            24         55     33
## Male             155        178    169
pd.crosstab(dataphy.Self_Employed,dataphy.Loan_Amount_Term)
## Loan_Amount_Term  12.0   36.0   60.0   84.0   ...  300.0  342.0  360.0  480.0
## Self_Employed                                 ...                            
## No                    1      2      1      3  ...     10     12    442     14
## Yes                   0      0      1      1  ...      3      2     70      1
## 
## [2 rows x 11 columns]

Kategori Multivariat

R

Cat3 <- df.data %>%                                    
select(Gender, Education, Loan_Status) %>%             
ftable()                                           
Cat3   
##                     Loan_Status   N   Y
## Gender Education                       
##        Graduate                   4   8
##        Not Graduate               1   0
## Female Graduate                  31  61
##        Not Graduate               6  14
## Male   Graduate                 105 271
##        Not Graduate              45  68

Python

df = dataphy
pd.crosstab([df.Gender, df.Married], [df.Education, df.Property_Area, df.Loan_Amount_Term])
## Education        Graduate                          ... Not Graduate                        
## Property_Area       Rural                          ...        Urban                        
## Loan_Amount_Term    84.0  180.0 300.0 342.0 360.0  ...        240.0 300.0 342.0 360.0 480.0
## Gender Married                                     ...                                     
## Female No               0     0     0     0    13  ...            0     0     2     2     0
##        Yes              0     0     0     0     3  ...            0     0     0     0     0
## Male   No               0     0     0     1    30  ...            1     0     0     3     1
##        Yes              2     8     3     1    68  ...            0     1     1    17     1
## 
## [4 rows x 43 columns]

Kuantitatif

Univariat numerik

R

Quan.data <- df.data %>% 
select_if(is.numeric)
names(Quan.data)
## [1] "ApplicantIncome"   "CoapplicantIncome" "LoanAmount"       
## [4] "Loan_Amount_Term"  "Credit_History"
mean(Quan.data$LoanAmount) 
## [1] 141.1661
quantile(Quan.data$LoanAmount)
##     0%    25%    50%    75%   100% 
##   0.00  98.00 125.00 164.75 700.00
median(Quan.data$LoanAmount)
## [1] 125
mode(Quan.data$LoanAmount) 
## [1] "numeric"
summary(Quan.data)
##  ApplicantIncome CoapplicantIncome   LoanAmount    Loan_Amount_Term
##  Min.   :  150   Min.   :    0     Min.   :  0.0   Min.   :  0.0   
##  1st Qu.: 2878   1st Qu.:    0     1st Qu.: 98.0   1st Qu.:360.0   
##  Median : 3812   Median : 1188     Median :125.0   Median :360.0   
##  Mean   : 5403   Mean   : 1621     Mean   :141.2   Mean   :334.2   
##  3rd Qu.: 5795   3rd Qu.: 2297     3rd Qu.:164.8   3rd Qu.:360.0   
##  Max.   :81000   Max.   :41667     Max.   :700.0   Max.   :480.0   
##  Credit_History  
##  Min.   :0.0000  
##  1st Qu.:1.0000  
##  Median :1.0000  
##  Mean   :0.7736  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
var(Quan.data$LoanAmount)
## [1] 7804.067
IQR(Quan.data$LoanAmount) 
## [1] 66.75
mad(Quan.data$Credit_History)
## [1] 0
sd(Quan.data$Credit_History)
## [1] 0.4188319
skewness(Quan.data$LoanAmount)
## [1] 2.350745
kurtosis(Quan.data$LoanAmount)
## [1] 9.070636

Python

data_num = dataphy.select_dtypes(include=['float64','int64'])  

data_num.describe()     
##        ApplicantIncome  CoapplicantIncome  ...  Loan_Amount_Term  Credit_History
## count       614.000000         614.000000  ...        614.000000      614.000000
## mean       5403.459283        1621.245798  ...        342.000000        0.838762
## std        6109.041673        2926.248369  ...         64.372489        0.360336
## min         150.000000           0.000000  ...         12.000000        0.000000
## 25%        2877.500000           0.000000  ...        360.000000        1.000000
## 50%        3812.500000        1188.500000  ...        360.000000        1.000000
## 75%        5795.000000        2297.250000  ...        360.000000        1.000000
## max       81000.000000       41667.000000  ...        480.000000        1.000000
## 
## [8 rows x 5 columns]
  • Scale

Variansi

data_num.var()
## ApplicantIncome      3.732039e+07
## CoapplicantIncome    8.562930e+06
## LoanAmount           7.074027e+03
## Loan_Amount_Term     4.143817e+03
## Credit_History       1.298418e-01
## dtype: float64

standar deviasi

data_num.std()
## ApplicantIncome      6109.041673
## CoapplicantIncome    2926.248369
## LoanAmount             84.107233
## Loan_Amount_Term       64.372489
## Credit_History          0.360336
## dtype: float64

MAD

data_num.apply(ss.median_abs_deviation)
## ApplicantIncome      1229.5
## CoapplicantIncome    1188.5
## LoanAmount             30.0
## Loan_Amount_Term        0.0
## Credit_History          0.0
## dtype: float64

IQR

data_num.apply(ss.iqr)
## ApplicantIncome      2917.50
## CoapplicantIncome    2297.25
## LoanAmount             64.50
## Loan_Amount_Term        0.00
## Credit_History          0.00
## dtype: float64
  • skewness
data_num.skew()
## ApplicantIncome      6.539513
## CoapplicantIncome    7.491531
## LoanAmount           2.743053
## Loan_Amount_Term    -2.389680
## Credit_History      -1.845586
## dtype: float64
  • Kurtosis
data_num.kurt()
## ApplicantIncome      60.540676
## CoapplicantIncome    84.956384
## LoanAmount           10.936695
## Loan_Amount_Term      6.897995
## Credit_History        1.490150
## dtype: float64

Bivariat numerik

R

cov(Quan.data$LoanAmount,Quan.data$Loan_Amount_Term)
## [1] 424.8604
cor(Quan.data$LoanAmount,Quan.data$Loan_Amount_Term)
## [1] 0.05851929
zscore=(Quan.data$LoanAmount-mean(Quan.data$LoanAmount))/sd(Quan.data$LoanAmount)
zscore
##   [1] -1.597975056 -0.149038146 -0.850866962 -0.239596703 -0.001880491
##   [6]  1.424416780 -0.522592193  0.190556442  0.303754638  2.352641988
##  [11] -0.805587683 -0.364114719  0.665988866 -0.307515621 -1.405538123
##  [16] -0.182997605 -0.465993095 -0.737668766 -0.092439048 -0.296195801
##  [21] -0.420713817  1.967768121 -0.284875981 -0.330155260  0.111317705
##  [26]  0.564110489 -0.216957064 -0.352794899 -1.201781370 -0.239596703
##  [31]  0.677308685 -0.760308405 -0.398074178 -0.307515621  2.024367219
##  [36] -1.597975056 -0.465993095  0.032078968  0.484871752 -0.352794899
##  [41] -0.692389487 -1.065943534 -0.748988585 -0.081119228 -0.511272374
##  [46] -0.601830931 -1.099902993  0.032078968 -0.239596703  0.032078968
##  [51] -0.465993095 -0.239596703 -0.330155260 -0.081119228  1.639493352
##  [56] -0.499952554 -0.511272374 -0.069799409  0.439592474  0.032078968
##  [61] -0.239596703 -0.477312915  0.269795179 -1.597975056 -0.284875981
##  [66]  1.322538403 -0.171677785  1.933808662 -0.182997605 -0.058479589
##  [71]  0.349033917 -0.499952554 -0.681069668 -0.522592193  0.518831211
##  [76] -0.318835440  0.394313195 -0.352794899  0.439592474 -0.126398507
##  [81] -0.341475079 -1.597975056  0.292434819  1.401777140 -1.031984076
##  [86] -0.058479589 -0.477312915 -0.420713817  0.779187062  0.382993376
##  [91] -0.115078687  0.530151030 -0.681069668 -0.216957064 -1.314979566
##  [96] -1.597975056 -0.047159770 -1.031984076 -0.296195801 -0.115078687
## [101] -0.092439048  0.111317705 -1.597975056 -1.597975056  0.213196081
## [106] -0.465993095  0.948984356 -0.239596703  0.847105980 -0.533912013
## [111] -0.058479589 -0.024520130  0.122637524 -1.597975056 -0.262236342
## [116]  0.496191572  0.145277164 -0.635790389  0.382993376  1.333858223
## [121]  0.439592474 -1.099902993 -0.047159770 -0.681069668  0.598069948
## [126] -0.545231832  2.590358199 -1.597975056  0.213196081  0.462232113
## [131]  5.759907690 -0.760308405 -0.805587683 -1.314979566 -0.443353456
## [136]  1.684772631 -0.647110209 -0.601830931  1.141421289 -0.137718327
## [141]  0.496191572  0.303754638  0.382993376 -0.216957064  0.518831211
## [146] -0.465993095 -0.805587683 -1.258380468  0.948984356 -0.182997605
## [151] -0.262236342  0.122637524  1.164060929 -0.318835440 -1.031984076
## [156]  5.193916710  0.213196081  0.518831211 -0.239596703  1.288578944
## [161] -0.488632734  1.514975336 -0.228276883  0.190556442 -0.748988585
## [166]  0.462232113 -0.330155260 -0.137718327 -0.884826421  0.665988866
## [171] -0.522592193  6.325898671 -0.681069668  0.518831211 -0.613150750
## [176] -0.284875981 -0.454673276  4.005335651 -0.284875981 -0.443353456
## [181]  0.439592474 -0.839547142 -0.771628225  1.345178042 -0.375434538
## [186] -0.239596703 -0.850866962 -0.941425519  0.303754638  0.530151030
## [191] -1.054623715  0.258475360  0.213196081 -0.737668766 -0.239596703
## [196]  0.326394277  0.518831211 -0.239596703 -0.318835440 -0.658430029
## [201] -0.579191291  0.281114999 -1.597975056 -0.069799409 -0.194317425
## [206] -0.239596703 -0.692389487 -0.975384978 -0.930105699 -0.160357966
## [211]  0.824466340 -0.149038146  1.118781650 -0.126398507 -0.047159770
## [216] -0.465993095 -0.069799409 -0.115078687 -0.782948044 -0.160357966
## [221] -0.918785880 -0.284875981  0.032078968  0.382993376 -0.149038146
## [226]  0.326394277 -0.035839950  0.779187062  0.190556442  0.665988866
## [231] -0.420713817 -1.122542633 -0.239596703  1.571574435 -0.013200311
## [236]  0.326394277  1.288578944 -0.216957064 -0.330155260 -0.511272374
## [241] -0.239596703 -0.013200311  0.156596983 -0.375434538 -0.205637244
## [246] -0.239596703 -0.330155260 -0.047159770 -0.205637244 -0.579191291
## [251]  0.677308685 -0.035839950 -0.420713817  1.560254615  0.575430309
## [256]  1.288578944 -0.296195801 -0.533912013  1.843250105 -0.149038146
## [261]  2.137565415 -0.081119228  0.156596983 -0.239596703 -0.149038146
## [266]  0.111317705  0.099997885  0.213196081 -0.069799409 -0.579191291
## [271] -1.258380468 -0.058479589 -0.171677785  0.099997885 -0.579191291
## [276] -0.296195801  0.745227603 -0.692389487  3.337466294 -0.194317425
## [281]  0.190556442 -0.330155260 -0.715029127 -0.986704797 -1.597975056
## [286] -0.590511111 -0.477312915 -0.239596703 -0.296195801  0.518831211
## [291] -0.024520130 -0.160357966 -0.081119228  0.020759148  0.349033917
## [296] -0.352794899  0.665988866 -0.069799409  0.111317705 -0.318835440
## [301] -0.545231832 -0.409393997 -0.103758868 -0.511272374 -0.013200311
## [306] -1.597975056 -0.069799409 -0.420713817  3.835538356  0.496191572
## [311] -0.647110209 -0.341475079 -0.964065158  0.032078968  0.201876262
## [316] -0.341475079 -0.239596703 -0.601830931 -0.330155260  0.156596983
## [321] -0.296195801 -0.194317425 -1.597975056 -0.103758868  1.797970827
## [326]  2.658277117 -0.126398507  0.484871752 -0.352794899 -0.839547142
## [331] -0.273556162 -0.488632734 -0.794267864  3.948736553  0.462232113
## [336] -0.805587683  0.213196081  0.394313195 -1.597975056 -0.794267864
## [341]  0.360353736 -1.077263354  0.190556442 -0.760308405 -0.182997605
## [346]  0.213196081  0.122637524 -0.171677785  1.333858223  0.518831211
## [351]  0.982943815  1.888529384 -0.522592193 -0.409393997 -0.126398507
## [356] -0.284875981  0.269795179 -0.839547142 -0.465993095  0.665988866
## [361] -0.681069668  1.073502372 -0.126398507 -0.522592193 -0.001880491
## [366] -0.092439048 -0.511272374 -0.194317425  0.382993376  4.854322121
## [371] -0.975384978  0.156596983  2.703556395 -0.341475079 -0.352794899
## [376] -0.239596703 -0.126398507 -0.126398507 -0.794267864 -0.126398507
## [381] -0.149038146  1.752691548  0.167916803 -0.149038146 -0.465993095
## [386] -0.318835440 -0.103758868 -1.597975056 -0.058479589 -0.182997605
## [391]  0.496191572  1.514975336 -0.239596703 -0.318835440 -0.318835440
## [396] -0.069799409 -0.794267864 -0.522592193 -0.364114719 -0.432033636
## [401] -1.088583174 -0.862186782 -0.432033636 -0.998024617  0.598069948
## [406] -0.296195801 -0.296195801 -0.850866962  0.122637524  2.477160003
## [411] -0.896146240  0.213196081  0.869745619 -0.352794899  0.416952834
## [416] -0.918785880  0.213196081  1.107461831 -0.330155260 -0.035839950
## [421] -0.035839950 -0.692389487 -0.465993095 -0.352794899 -0.511272374
## [426] -0.228276883 -0.681069668 -0.092439048 -0.613150750 -0.918785880
## [431]  0.099997885 -0.409393997  2.986551886  0.020759148 -0.465993095
## [436] -1.597975056 -1.031984076 -1.597975056  0.518831211 -0.035839950
## [441]  0.518831211  0.439592474  0.077358246  0.122637524  0.382993376
## [446] -0.126398507 -0.352794899 -0.975384978  0.099997885  0.552790670
## [451] -0.182997605 -0.918785880  0.088678066 -0.579191291 -0.647110209
## [456] -0.511272374 -0.262236342  0.360353736 -0.058479589  0.213196081
## [461]  0.213196081 -0.149038146  0.133957344 -0.103758868 -0.488632734
## [466] -0.013200311 -0.805587683 -0.352794899 -0.488632734 -0.352794899
## [471]  0.235835721 -0.318835440 -0.465993095 -0.545231832  0.235835721
## [476]  0.099997885  1.005583454 -0.103758868 -0.624470570 -1.597975056
## [481]  0.145277164 -0.318835440 -0.149038146  1.050862732  1.186700568
## [486] -0.115078687 -0.692389487  4.061934749  0.213196081 -0.748988585
## [491] -0.511272374  0.507511391 -0.352794899  0.948984356 -0.250916523
## [496] -0.409393997 -0.386754358 -0.341475079 -0.522592193  0.767867242
## [501] -0.318835440 -0.465993095  0.756547423 -0.035839950 -0.194317425
## [506]  1.152741109  3.835538356 -0.511272374  0.530151030 -1.145182272
## [511] -0.465993095  1.231979846  0.077358246 -0.805587683  1.922488842
## [516]  0.099997885 -0.318835440 -0.205637244  0.496191572 -0.522592193
## [521] -1.088583174 -0.975384978 -0.465993095  3.835538356 -1.597975056
## [526]  2.929952788 -0.352794899  0.224515901 -0.533912013 -0.126398507
## [531]  0.847105980 -0.465993095 -0.352794899  0.620709587 -0.182997605
## [536] -0.171677785  2.069646497 -0.386754358 -0.850866962  0.179236623
## [541] -0.013200311 -0.477312915 -0.522592193 -0.149038146 -0.443353456
## [546]  0.156596983 -0.692389487  0.043398787 -0.432033636 -0.352794899
## [551] -1.597975056 -1.597975056  0.190556442  0.450912293 -0.103758868
## [556] -1.303659746 -0.647110209  1.345178042  0.235835721  0.462232113
## [561] -0.375434538  5.193916710  0.790506881 -0.103758868  1.322538403
## [566] -0.239596703 -0.805587683 -0.205637244 -1.496096680 -0.420713817
## [571]  0.507511391  0.269795179  1.514975336  0.518831211  0.099997885
## [576] -0.375434538 -0.058479589 -0.352794899 -0.386754358  0.224515901
## [581]  0.722587964 -0.579191291 -1.190461550 -0.907466060  0.054718607
## [586]  0.349033917 -0.420713817 -0.805587683 -0.533912013 -0.398074178
## [591] -0.964065158  0.722587964  1.707412270  0.009439328  1.345178042
## [596] -0.352794899  0.518831211 -0.601830931  0.439592474  0.575430309
## [601]  2.363961807  0.156596983 -0.149038146  0.349033917  4.016655470
## [606] -1.597975056  0.360353736  0.179236623 -0.375434538 -0.794267864
## [611] -1.145182272  1.265939305  0.518831211 -0.092439048

Python

  • Covarians
data_num[['ApplicantIncome','LoanAmount']].cov()
##                  ApplicantIncome     LoanAmount
## ApplicantIncome     3.732039e+07  290397.998060
## LoanAmount          2.903980e+05    7074.026707
data_num[['CoapplicantIncome','LoanAmount']].cov()
##                    CoapplicantIncome    LoanAmount
## CoapplicantIncome       8.562930e+06  46570.025909
## LoanAmount              4.657003e+04   7074.026707
  • pearson’s Correlation
data_num[['ApplicantIncome','LoanAmount']].corr()
##                  ApplicantIncome  LoanAmount
## ApplicantIncome         1.000000    0.565181
## LoanAmount              0.565181    1.000000
data_num[['CoapplicantIncome','LoanAmount']].corr()
##                    CoapplicantIncome  LoanAmount
## CoapplicantIncome           1.000000    0.189218
## LoanAmount                  0.189218    1.000000
  • Z-Score
zscore = (data_num - data_num.mean())/data_num.std()

zscore
##      ApplicantIncome  CoapplicantIncome  ...  Loan_Amount_Term  Credit_History
## 0           0.072931          -0.554036  ...          0.279623        0.447466
## 1          -0.134302          -0.038700  ...          0.279623        0.447466
## 2          -0.393427          -0.554036  ...          0.279623        0.447466
## 3          -0.461686           0.251774  ...          0.279623        0.447466
## 4           0.097649          -0.554036  ...          0.279623        0.447466
## ..               ...                ...  ...               ...             ...
## 609        -0.409796          -0.554036  ...          0.279623        0.447466
## 610        -0.212383          -0.554036  ...         -2.516603        0.447466
## 611         0.436818          -0.472019  ...          0.279623        0.447466
## 612         0.356773          -0.554036  ...          0.279623        0.447466
## 613        -0.134302          -0.554036  ...          0.279623       -2.327725
## 
## [614 rows x 5 columns]

Multivariat numerik

R

cov(Quan.data)
##                   ApplicantIncome CoapplicantIncome    LoanAmount
## ApplicantIncome      3.732039e+07     -2.084490e+06 290502.424393
## CoapplicantIncome   -2.084490e+06      8.562930e+06  49213.843474
## LoanAmount           2.905024e+05      4.921384e+04   7804.066975
## Loan_Amount_Term    -7.854886e+03     -1.210165e+04    424.860360
## Credit_History       1.787413e+01     -7.206009e+01     -1.210292
##                   Loan_Amount_Term Credit_History
## ApplicantIncome       -7854.885728     17.8741279
## CoapplicantIncome    -12101.648543    -72.0600882
## LoanAmount              424.860360     -1.2102917
## Loan_Amount_Term       6754.190796      1.7260549
## Credit_History            1.726055      0.1754202
cor(Quan.data)
##                   ApplicantIncome CoapplicantIncome  LoanAmount
## ApplicantIncome       1.000000000       -0.11660458  0.53828984
## CoapplicantIncome    -0.116604581        1.00000000  0.19037749
## LoanAmount            0.538289836        0.19037749  1.00000000
## Loan_Amount_Term     -0.015645164       -0.05032070  0.05851929
## Credit_History        0.006985733       -0.05879546 -0.03271069
##                   Loan_Amount_Term Credit_History
## ApplicantIncome        -0.01564516    0.006985733
## CoapplicantIncome      -0.05032070   -0.058795464
## LoanAmount              0.05851929   -0.032710695
## Loan_Amount_Term        1.00000000    0.050145061
## Credit_History          0.05014506    1.000000000

Python

Covariance

data_num.cov()
##                    ApplicantIncome  ...  Credit_History
## ApplicantIncome       3.732039e+07  ...      -26.753449
## CoapplicantIncome    -2.084490e+06  ...       15.934204
## LoanAmount            2.903980e+05  ...        0.000802
## Loan_Amount_Term     -1.779154e+04  ...       -0.153344
## Credit_History       -2.675345e+01  ...        0.129842
## 
## [5 rows x 5 columns]

Correlation

data_num.corr()
##                    ApplicantIncome  ...  Credit_History
## ApplicantIncome           1.000000  ...       -0.012153
## CoapplicantIncome        -0.116605  ...        0.015112
## LoanAmount                0.565181  ...        0.000026
## Loan_Amount_Term         -0.045242  ...       -0.006611
## Credit_History           -0.012153  ...        1.000000
## 
## [5 rows x 5 columns]

EDA dengan cara Malas

R

basic_eda <- function(data)
{
  glimpse(data)
  skim(data)
  df_status(data)
  freq(data) 
  profiling_num(data)
  plot_num(data)
  describe(data)
}
basic_eda(data)
## Rows: 614
## Columns: 13
## $ Loan_ID           <chr> "LP001002", "LP001003", "LP001005", "LP001006", "LP0…
## $ Gender            <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Mal…
## $ Married           <chr> "No", "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes"…
## $ Dependents        <chr> "0", "1", "0", "0", "0", "2", "0", "3+", "2", "1", "…
## $ Education         <chr> "Graduate", "Graduate", "Graduate", "Not Graduate", …
## $ Self_Employed     <chr> "No", "No", "Yes", "No", "No", "Yes", "No", "No", "N…
## $ ApplicantIncome   <int> 5849, 4583, 3000, 2583, 6000, 5417, 2333, 3036, 4006…
## $ CoapplicantIncome <dbl> 0, 1508, 0, 2358, 0, 4196, 1516, 2504, 1526, 10968, …
## $ LoanAmount        <dbl> 0, 128, 66, 120, 141, 267, 95, 158, 168, 349, 70, 10…
## $ Loan_Amount_Term  <dbl> 360, 360, 360, 360, 360, 360, 360, 360, 360, 360, 36…
## $ Credit_History    <dbl> 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0…
## $ Property_Area     <chr> "Urban", "Rural", "Urban", "Urban", "Urban", "Urban"…
## $ Loan_Status       <chr> "Y", "N", "Y", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y…
##             variable q_zeros p_zeros q_na p_na q_inf p_inf      type unique
## 1            Loan_ID       0    0.00    0    0     0     0 character    614
## 2             Gender       0    0.00    0    0     0     0 character      3
## 3            Married       0    0.00    0    0     0     0 character      3
## 4         Dependents     345   56.19    0    0     0     0 character      5
## 5          Education       0    0.00    0    0     0     0 character      2
## 6      Self_Employed       0    0.00    0    0     0     0 character      3
## 7    ApplicantIncome       0    0.00    0    0     0     0   integer    505
## 8  CoapplicantIncome     273   44.46    0    0     0     0   numeric    287
## 9         LoanAmount      22    3.58    0    0     0     0   numeric    204
## 10  Loan_Amount_Term      14    2.28    0    0     0     0   numeric     11
## 11    Credit_History     139   22.64    0    0     0     0   numeric      2
## 12     Property_Area       0    0.00    0    0     0     0 character      3
## 13       Loan_Status       0    0.00    0    0     0     0 character      2
##      Loan_ID frequency percentage cumulative_perc
## 1   LP001002         1       0.16            0.16
## 2   LP001003         1       0.16            0.32
## 3   LP001005         1       0.16            0.48
## 4   LP001006         1       0.16            0.64
## 5   LP001008         1       0.16            0.80
## 6   LP001011         1       0.16            0.96
## 7   LP001013         1       0.16            1.12
## 8   LP001014         1       0.16            1.28
## 9   LP001018         1       0.16            1.44
## 10  LP001020         1       0.16            1.60
## 11  LP001024         1       0.16            1.76
## 12  LP001027         1       0.16            1.92
## 13  LP001028         1       0.16            2.08
## 14  LP001029         1       0.16            2.24
## 15  LP001030         1       0.16            2.40
## 16  LP001032         1       0.16            2.56
## 17  LP001034         1       0.16            2.72
## 18  LP001036         1       0.16            2.88
## 19  LP001038         1       0.16            3.04
## 20  LP001041         1       0.16            3.20
## 21  LP001043         1       0.16            3.36
## 22  LP001046         1       0.16            3.52
## 23  LP001047         1       0.16            3.68
## 24  LP001050         1       0.16            3.84
## 25  LP001052         1       0.16            4.00
## 26  LP001066         1       0.16            4.16
## 27  LP001068         1       0.16            4.32
## 28  LP001073         1       0.16            4.48
## 29  LP001086         1       0.16            4.64
## 30  LP001087         1       0.16            4.80
## 31  LP001091         1       0.16            4.96
## 32  LP001095         1       0.16            5.12
## 33  LP001097         1       0.16            5.28
## 34  LP001098         1       0.16            5.44
## 35  LP001100         1       0.16            5.60
## 36  LP001106         1       0.16            5.76
## 37  LP001109         1       0.16            5.92
## 38  LP001112         1       0.16            6.08
## 39  LP001114         1       0.16            6.24
## 40  LP001116         1       0.16            6.40
## 41  LP001119         1       0.16            6.56
## 42  LP001120         1       0.16            6.72
## 43  LP001123         1       0.16            6.88
## 44  LP001131         1       0.16            7.04
## 45  LP001136         1       0.16            7.20
## 46  LP001137         1       0.16            7.36
## 47  LP001138         1       0.16            7.52
## 48  LP001144         1       0.16            7.68
## 49  LP001146         1       0.16            7.84
## 50  LP001151         1       0.16            8.00
## 51  LP001155         1       0.16            8.16
## 52  LP001157         1       0.16            8.32
## 53  LP001164         1       0.16            8.48
## 54  LP001179         1       0.16            8.64
## 55  LP001186         1       0.16            8.80
## 56  LP001194         1       0.16            8.96
## 57  LP001195         1       0.16            9.12
## 58  LP001197         1       0.16            9.28
## 59  LP001198         1       0.16            9.44
## 60  LP001199         1       0.16            9.60
## 61  LP001205         1       0.16            9.76
## 62  LP001206         1       0.16            9.92
## 63  LP001207         1       0.16           10.08
## 64  LP001213         1       0.16           10.24
## 65  LP001222         1       0.16           10.40
## 66  LP001225         1       0.16           10.56
## 67  LP001228         1       0.16           10.72
## 68  LP001233         1       0.16           10.88
## 69  LP001238         1       0.16           11.04
## 70  LP001241         1       0.16           11.20
## 71  LP001243         1       0.16           11.36
## 72  LP001245         1       0.16           11.52
## 73  LP001248         1       0.16           11.68
## 74  LP001250         1       0.16           11.84
## 75  LP001253         1       0.16           12.00
## 76  LP001255         1       0.16           12.16
## 77  LP001256         1       0.16           12.32
## 78  LP001259         1       0.16           12.48
## 79  LP001263         1       0.16           12.64
## 80  LP001264         1       0.16           12.80
## 81  LP001265         1       0.16           12.96
## 82  LP001266         1       0.16           13.12
## 83  LP001267         1       0.16           13.28
## 84  LP001273         1       0.16           13.44
## 85  LP001275         1       0.16           13.60
## 86  LP001279         1       0.16           13.76
## 87  LP001280         1       0.16           13.92
## 88  LP001282         1       0.16           14.08
## 89  LP001289         1       0.16           14.24
## 90  LP001310         1       0.16           14.40
## 91  LP001316         1       0.16           14.56
## 92  LP001318         1       0.16           14.72
## 93  LP001319         1       0.16           14.88
## 94  LP001322         1       0.16           15.04
## 95  LP001325         1       0.16           15.20
## 96  LP001326         1       0.16           15.36
## 97  LP001327         1       0.16           15.52
## 98  LP001333         1       0.16           15.68
## 99  LP001334         1       0.16           15.84
## 100 LP001343         1       0.16           16.00
## 101 LP001345         1       0.16           16.16
## 102 LP001349         1       0.16           16.32
## 103 LP001350         1       0.16           16.48
## 104 LP001356         1       0.16           16.64
## 105 LP001357         1       0.16           16.80
## 106 LP001367         1       0.16           16.96
## 107 LP001369         1       0.16           17.12
## 108 LP001370         1       0.16           17.28
## 109 LP001379         1       0.16           17.44
## 110 LP001384         1       0.16           17.60
## 111 LP001385         1       0.16           17.76
## 112 LP001387         1       0.16           17.92
## 113 LP001391         1       0.16           18.08
## 114 LP001392         1       0.16           18.24
## 115 LP001398         1       0.16           18.40
## 116 LP001401         1       0.16           18.56
## 117 LP001404         1       0.16           18.72
## 118 LP001405         1       0.16           18.88
## 119 LP001421         1       0.16           19.04
## 120 LP001422         1       0.16           19.20
## 121 LP001426         1       0.16           19.36
## 122 LP001430         1       0.16           19.52
## 123 LP001431         1       0.16           19.68
## 124 LP001432         1       0.16           19.84
## 125 LP001439         1       0.16           20.00
## 126 LP001443         1       0.16           20.16
## 127 LP001448         1       0.16           20.32
## 128 LP001449         1       0.16           20.48
## 129 LP001451         1       0.16           20.64
## 130 LP001465         1       0.16           20.80
## 131 LP001469         1       0.16           20.96
## 132 LP001473         1       0.16           21.12
## 133 LP001478         1       0.16           21.28
## 134 LP001482         1       0.16           21.44
## 135 LP001487         1       0.16           21.60
## 136 LP001488         1       0.16           21.76
## 137 LP001489         1       0.16           21.92
## 138 LP001491         1       0.16           22.08
## 139 LP001492         1       0.16           22.24
## 140 LP001493         1       0.16           22.40
## 141 LP001497         1       0.16           22.56
## 142 LP001498         1       0.16           22.72
## 143 LP001504         1       0.16           22.88
## 144 LP001507         1       0.16           23.04
## 145 LP001508         1       0.16           23.20
## 146 LP001514         1       0.16           23.36
## 147 LP001516         1       0.16           23.52
## 148 LP001518         1       0.16           23.68
## 149 LP001519         1       0.16           23.84
## 150 LP001520         1       0.16           24.00
## 151 LP001528         1       0.16           24.16
## 152 LP001529         1       0.16           24.32
## 153 LP001531         1       0.16           24.48
## 154 LP001532         1       0.16           24.64
## 155 LP001535         1       0.16           24.80
## 156 LP001536         1       0.16           24.96
## 157 LP001541         1       0.16           25.12
## 158 LP001543         1       0.16           25.28
## 159 LP001546         1       0.16           25.44
## 160 LP001552         1       0.16           25.60
## 161 LP001560         1       0.16           25.76
## 162 LP001562         1       0.16           25.92
## 163 LP001565         1       0.16           26.08
## 164 LP001570         1       0.16           26.24
## 165 LP001572         1       0.16           26.40
## 166 LP001574         1       0.16           26.56
## 167 LP001577         1       0.16           26.72
## 168 LP001578         1       0.16           26.88
## 169 LP001579         1       0.16           27.04
## 170 LP001580         1       0.16           27.20
## 171 LP001581         1       0.16           27.36
## 172 LP001585         1       0.16           27.52
## 173 LP001586         1       0.16           27.68
## 174 LP001594         1       0.16           27.84
## 175 LP001603         1       0.16           28.00
## 176 LP001606         1       0.16           28.16
## 177 LP001608         1       0.16           28.32
## 178 LP001610         1       0.16           28.48
## 179 LP001616         1       0.16           28.64
## 180 LP001630         1       0.16           28.80
## 181 LP001633         1       0.16           28.96
## 182 LP001634         1       0.16           29.12
## 183 LP001636         1       0.16           29.28
## 184 LP001637         1       0.16           29.44
## 185 LP001639         1       0.16           29.60
## 186 LP001640         1       0.16           29.76
## 187 LP001641         1       0.16           29.92
## 188 LP001643         1       0.16           30.08
## 189 LP001644         1       0.16           30.24
## 190 LP001647         1       0.16           30.40
## 191 LP001653         1       0.16           30.56
## 192 LP001656         1       0.16           30.72
## 193 LP001657         1       0.16           30.88
## 194 LP001658         1       0.16           31.04
## 195 LP001664         1       0.16           31.20
## 196 LP001665         1       0.16           31.36
## 197 LP001666         1       0.16           31.52
## 198 LP001669         1       0.16           31.68
## 199 LP001671         1       0.16           31.84
## 200 LP001673         1       0.16           32.00
## 201 LP001674         1       0.16           32.16
## 202 LP001677         1       0.16           32.32
## 203 LP001682         1       0.16           32.48
## 204 LP001688         1       0.16           32.64
## 205 LP001691         1       0.16           32.80
## 206 LP001692         1       0.16           32.96
## 207 LP001693         1       0.16           33.12
## 208 LP001698         1       0.16           33.28
## 209 LP001699         1       0.16           33.44
## 210 LP001702         1       0.16           33.60
## 211 LP001708         1       0.16           33.76
## 212 LP001711         1       0.16           33.92
## 213 LP001713         1       0.16           34.08
## 214 LP001715         1       0.16           34.24
## 215 LP001716         1       0.16           34.40
## 216 LP001720         1       0.16           34.56
## 217 LP001722         1       0.16           34.72
## 218 LP001726         1       0.16           34.88
## 219 LP001732         1       0.16           35.04
## 220 LP001734         1       0.16           35.20
## 221 LP001736         1       0.16           35.36
## 222 LP001743         1       0.16           35.52
## 223 LP001744         1       0.16           35.68
## 224 LP001749         1       0.16           35.84
## 225 LP001750         1       0.16           36.00
## 226 LP001751         1       0.16           36.16
## 227 LP001754         1       0.16           36.32
## 228 LP001758         1       0.16           36.48
## 229 LP001760         1       0.16           36.64
## 230 LP001761         1       0.16           36.80
## 231 LP001765         1       0.16           36.96
## 232 LP001768         1       0.16           37.12
## 233 LP001770         1       0.16           37.28
## 234 LP001776         1       0.16           37.44
## 235 LP001778         1       0.16           37.60
## 236 LP001784         1       0.16           37.76
## 237 LP001786         1       0.16           37.92
## 238 LP001788         1       0.16           38.08
## 239 LP001790         1       0.16           38.24
## 240 LP001792         1       0.16           38.40
## 241 LP001798         1       0.16           38.56
## 242 LP001800         1       0.16           38.72
## 243 LP001806         1       0.16           38.88
## 244 LP001807         1       0.16           39.04
## 245 LP001811         1       0.16           39.20
## 246 LP001813         1       0.16           39.36
## 247 LP001814         1       0.16           39.52
## 248 LP001819         1       0.16           39.68
## 249 LP001824         1       0.16           39.84
## 250 LP001825         1       0.16           40.00
## 251 LP001835         1       0.16           40.16
## 252 LP001836         1       0.16           40.32
## 253 LP001841         1       0.16           40.48
## 254 LP001843         1       0.16           40.64
## 255 LP001844         1       0.16           40.80
## 256 LP001846         1       0.16           40.96
## 257 LP001849         1       0.16           41.12
## 258 LP001854         1       0.16           41.28
## 259 LP001859         1       0.16           41.44
## 260 LP001864         1       0.16           41.60
## 261 LP001865         1       0.16           41.76
## 262 LP001868         1       0.16           41.92
## 263 LP001870         1       0.16           42.08
## 264 LP001871         1       0.16           42.24
## 265 LP001872         1       0.16           42.40
## 266 LP001875         1       0.16           42.56
## 267 LP001877         1       0.16           42.72
## 268 LP001882         1       0.16           42.88
## 269 LP001883         1       0.16           43.04
## 270 LP001884         1       0.16           43.20
## 271 LP001888         1       0.16           43.36
## 272 LP001891         1       0.16           43.52
## 273 LP001892         1       0.16           43.68
## 274 LP001894         1       0.16           43.84
## 275 LP001896         1       0.16           44.00
## 276 LP001900         1       0.16           44.16
## 277 LP001903         1       0.16           44.32
## 278 LP001904         1       0.16           44.48
## 279 LP001907         1       0.16           44.64
## 280 LP001908         1       0.16           44.80
## 281 LP001910         1       0.16           44.96
## 282 LP001914         1       0.16           45.12
## 283 LP001915         1       0.16           45.28
## 284 LP001917         1       0.16           45.44
## 285 LP001922         1       0.16           45.60
## 286 LP001924         1       0.16           45.76
## 287 LP001925         1       0.16           45.92
## 288 LP001926         1       0.16           46.08
## 289 LP001931         1       0.16           46.24
## 290 LP001935         1       0.16           46.40
## 291 LP001936         1       0.16           46.56
## 292 LP001938         1       0.16           46.72
## 293 LP001940         1       0.16           46.88
## 294 LP001945         1       0.16           47.04
## 295 LP001947         1       0.16           47.20
## 296 LP001949         1       0.16           47.36
## 297 LP001953         1       0.16           47.52
## 298 LP001954         1       0.16           47.68
## 299 LP001955         1       0.16           47.84
## 300 LP001963         1       0.16           48.00
## 301 LP001964         1       0.16           48.16
## 302 LP001972         1       0.16           48.32
## 303 LP001974         1       0.16           48.48
## 304 LP001977         1       0.16           48.64
## 305 LP001978         1       0.16           48.80
## 306 LP001990         1       0.16           48.96
## 307 LP001993         1       0.16           49.12
## 308 LP001994         1       0.16           49.28
## 309 LP001996         1       0.16           49.44
## 310 LP001998         1       0.16           49.60
## 311 LP002002         1       0.16           49.76
## 312 LP002004         1       0.16           49.92
## 313 LP002006         1       0.16           50.08
## 314 LP002008         1       0.16           50.24
## 315 LP002024         1       0.16           50.40
## 316 LP002031         1       0.16           50.56
## 317 LP002035         1       0.16           50.72
## 318 LP002036         1       0.16           50.88
## 319 LP002043         1       0.16           51.04
## 320 LP002050         1       0.16           51.20
## 321 LP002051         1       0.16           51.36
## 322 LP002053         1       0.16           51.52
## 323 LP002054         1       0.16           51.68
## 324 LP002055         1       0.16           51.84
## 325 LP002065         1       0.16           52.00
## 326 LP002067         1       0.16           52.16
## 327 LP002068         1       0.16           52.32
## 328 LP002082         1       0.16           52.48
## 329 LP002086         1       0.16           52.64
## 330 LP002087         1       0.16           52.80
## 331 LP002097         1       0.16           52.96
## 332 LP002098         1       0.16           53.12
## 333 LP002100         1       0.16           53.28
## 334 LP002101         1       0.16           53.44
## 335 LP002103         1       0.16           53.60
## 336 LP002106         1       0.16           53.76
## 337 LP002110         1       0.16           53.92
## 338 LP002112         1       0.16           54.08
## 339 LP002113         1       0.16           54.24
## 340 LP002114         1       0.16           54.40
## 341 LP002115         1       0.16           54.56
## 342 LP002116         1       0.16           54.72
## 343 LP002119         1       0.16           54.88
## 344 LP002126         1       0.16           55.04
## 345 LP002128         1       0.16           55.20
## 346 LP002129         1       0.16           55.36
## 347 LP002130         1       0.16           55.52
## 348 LP002131         1       0.16           55.68
## 349 LP002137         1       0.16           55.84
## 350 LP002138         1       0.16           56.00
## 351 LP002139         1       0.16           56.16
## 352 LP002140         1       0.16           56.32
## 353 LP002141         1       0.16           56.48
## 354 LP002142         1       0.16           56.64
## 355 LP002143         1       0.16           56.80
## 356 LP002144         1       0.16           56.96
## 357 LP002149         1       0.16           57.12
## 358 LP002151         1       0.16           57.28
## 359 LP002158         1       0.16           57.44
## 360 LP002160         1       0.16           57.60
## 361 LP002161         1       0.16           57.76
## 362 LP002170         1       0.16           57.92
## 363 LP002175         1       0.16           58.08
## 364 LP002178         1       0.16           58.24
## 365 LP002180         1       0.16           58.40
## 366 LP002181         1       0.16           58.56
## 367 LP002187         1       0.16           58.72
## 368 LP002188         1       0.16           58.88
## 369 LP002190         1       0.16           59.04
## 370 LP002191         1       0.16           59.20
## 371 LP002194         1       0.16           59.36
## 372 LP002197         1       0.16           59.52
## 373 LP002201         1       0.16           59.68
## 374 LP002205         1       0.16           59.84
## 375 LP002209         1       0.16           60.00
## 376 LP002211         1       0.16           60.16
## 377 LP002219         1       0.16           60.32
## 378 LP002223         1       0.16           60.48
## 379 LP002224         1       0.16           60.64
## 380 LP002225         1       0.16           60.80
## 381 LP002226         1       0.16           60.96
## 382 LP002229         1       0.16           61.12
## 383 LP002231         1       0.16           61.28
## 384 LP002234         1       0.16           61.44
## 385 LP002236         1       0.16           61.60
## 386 LP002237         1       0.16           61.76
## 387 LP002239         1       0.16           61.92
## 388 LP002243         1       0.16           62.08
## 389 LP002244         1       0.16           62.24
## 390 LP002250         1       0.16           62.40
## 391 LP002255         1       0.16           62.56
## 392 LP002262         1       0.16           62.72
## 393 LP002263         1       0.16           62.88
## 394 LP002265         1       0.16           63.04
## 395 LP002266         1       0.16           63.20
## 396 LP002272         1       0.16           63.36
## 397 LP002277         1       0.16           63.52
## 398 LP002281         1       0.16           63.68
## 399 LP002284         1       0.16           63.84
## 400 LP002287         1       0.16           64.00
## 401 LP002288         1       0.16           64.16
## 402 LP002296         1       0.16           64.32
## 403 LP002297         1       0.16           64.48
## 404 LP002300         1       0.16           64.64
## 405 LP002301         1       0.16           64.80
## 406 LP002305         1       0.16           64.96
## 407 LP002308         1       0.16           65.12
## 408 LP002314         1       0.16           65.28
## 409 LP002315         1       0.16           65.44
## 410 LP002317         1       0.16           65.60
## 411 LP002318         1       0.16           65.76
## 412 LP002319         1       0.16           65.92
## 413 LP002328         1       0.16           66.08
## 414 LP002332         1       0.16           66.24
## 415 LP002335         1       0.16           66.40
## 416 LP002337         1       0.16           66.56
## 417 LP002341         1       0.16           66.72
## 418 LP002342         1       0.16           66.88
## 419 LP002345         1       0.16           67.04
## 420 LP002347         1       0.16           67.20
## 421 LP002348         1       0.16           67.36
## 422 LP002357         1       0.16           67.52
## 423 LP002361         1       0.16           67.68
## 424 LP002362         1       0.16           67.84
## 425 LP002364         1       0.16           68.00
## 426 LP002366         1       0.16           68.16
## 427 LP002367         1       0.16           68.32
## 428 LP002368         1       0.16           68.48
## 429 LP002369         1       0.16           68.64
## 430 LP002370         1       0.16           68.80
## 431 LP002377         1       0.16           68.96
## 432 LP002379         1       0.16           69.12
## 433 LP002386         1       0.16           69.28
## 434 LP002387         1       0.16           69.44
## 435 LP002390         1       0.16           69.60
## 436 LP002393         1       0.16           69.76
## 437 LP002398         1       0.16           69.92
## 438 LP002401         1       0.16           70.08
## 439 LP002403         1       0.16           70.24
## 440 LP002407         1       0.16           70.40
## 441 LP002408         1       0.16           70.56
## 442 LP002409         1       0.16           70.72
## 443 LP002418         1       0.16           70.88
## 444 LP002422         1       0.16           71.04
## 445 LP002424         1       0.16           71.20
## 446 LP002429         1       0.16           71.36
## 447 LP002434         1       0.16           71.52
## 448 LP002435         1       0.16           71.68
## 449 LP002443         1       0.16           71.84
## 450 LP002444         1       0.16           72.00
## 451 LP002446         1       0.16           72.16
## 452 LP002447         1       0.16           72.32
## 453 LP002448         1       0.16           72.48
## 454 LP002449         1       0.16           72.64
## 455 LP002453         1       0.16           72.80
## 456 LP002455         1       0.16           72.96
## 457 LP002459         1       0.16           73.12
## 458 LP002467         1       0.16           73.28
## 459 LP002472         1       0.16           73.44
## 460 LP002473         1       0.16           73.60
## 461 LP002478         1       0.16           73.76
## 462 LP002484         1       0.16           73.92
## 463 LP002487         1       0.16           74.08
## 464 LP002489         1       0.16           74.24
## 465 LP002493         1       0.16           74.40
## 466 LP002494         1       0.16           74.56
## 467 LP002500         1       0.16           74.72
## 468 LP002501         1       0.16           74.88
## 469 LP002502         1       0.16           75.04
## 470 LP002505         1       0.16           75.20
## 471 LP002515         1       0.16           75.36
## 472 LP002517         1       0.16           75.52
## 473 LP002519         1       0.16           75.68
## 474 LP002522         1       0.16           75.84
## 475 LP002524         1       0.16           76.00
## 476 LP002527         1       0.16           76.16
## 477 LP002529         1       0.16           76.32
## 478 LP002530         1       0.16           76.48
## 479 LP002531         1       0.16           76.64
## 480 LP002533         1       0.16           76.80
## 481 LP002534         1       0.16           76.96
## 482 LP002536         1       0.16           77.12
## 483 LP002537         1       0.16           77.28
## 484 LP002541         1       0.16           77.44
## 485 LP002543         1       0.16           77.60
## 486 LP002544         1       0.16           77.76
## 487 LP002545         1       0.16           77.92
## 488 LP002547         1       0.16           78.08
## 489 LP002555         1       0.16           78.24
## 490 LP002556         1       0.16           78.40
## 491 LP002560         1       0.16           78.56
## 492 LP002562         1       0.16           78.72
## 493 LP002571         1       0.16           78.88
## 494 LP002582         1       0.16           79.04
## 495 LP002585         1       0.16           79.20
## 496 LP002586         1       0.16           79.36
## 497 LP002587         1       0.16           79.52
## 498 LP002588         1       0.16           79.68
## 499 LP002600         1       0.16           79.84
## 500 LP002602         1       0.16           80.00
## 501 LP002603         1       0.16           80.16
## 502 LP002606         1       0.16           80.32
## 503 LP002615         1       0.16           80.48
## 504 LP002618         1       0.16           80.64
## 505 LP002619         1       0.16           80.80
## 506 LP002622         1       0.16           80.96
## 507 LP002624         1       0.16           81.12
## 508 LP002625         1       0.16           81.28
## 509 LP002626         1       0.16           81.44
## 510 LP002634         1       0.16           81.60
## 511 LP002637         1       0.16           81.76
## 512 LP002640         1       0.16           81.92
## 513 LP002643         1       0.16           82.08
## 514 LP002648         1       0.16           82.24
## 515 LP002652         1       0.16           82.40
## 516 LP002659         1       0.16           82.56
## 517 LP002670         1       0.16           82.72
## 518 LP002682         1       0.16           82.88
## 519 LP002683         1       0.16           83.04
## 520 LP002684         1       0.16           83.20
## 521 LP002689         1       0.16           83.36
## 522 LP002690         1       0.16           83.52
## 523 LP002692         1       0.16           83.68
## 524 LP002693         1       0.16           83.84
## 525 LP002697         1       0.16           84.00
## 526 LP002699         1       0.16           84.16
## 527 LP002705         1       0.16           84.32
## 528 LP002706         1       0.16           84.48
## 529 LP002714         1       0.16           84.64
## 530 LP002716         1       0.16           84.80
## 531 LP002717         1       0.16           84.96
## 532 LP002720         1       0.16           85.12
## 533 LP002723         1       0.16           85.28
## 534 LP002729         1       0.16           85.44
## 535 LP002731         1       0.16           85.60
## 536 LP002732         1       0.16           85.76
## 537 LP002734         1       0.16           85.92
## 538 LP002738         1       0.16           86.08
## 539 LP002739         1       0.16           86.24
## 540 LP002740         1       0.16           86.40
## 541 LP002741         1       0.16           86.56
## 542 LP002743         1       0.16           86.72
## 543 LP002753         1       0.16           86.88
## 544 LP002755         1       0.16           87.04
## 545 LP002757         1       0.16           87.20
## 546 LP002767         1       0.16           87.36
## 547 LP002768         1       0.16           87.52
## 548 LP002772         1       0.16           87.68
## 549 LP002776         1       0.16           87.84
## 550 LP002777         1       0.16           88.00
## 551 LP002778         1       0.16           88.16
## 552 LP002784         1       0.16           88.32
## 553 LP002785         1       0.16           88.48
## 554 LP002788         1       0.16           88.64
## 555 LP002789         1       0.16           88.80
## 556 LP002792         1       0.16           88.96
## 557 LP002794         1       0.16           89.12
## 558 LP002795         1       0.16           89.28
## 559 LP002798         1       0.16           89.44
## 560 LP002804         1       0.16           89.60
## 561 LP002807         1       0.16           89.76
## 562 LP002813         1       0.16           89.92
## 563 LP002820         1       0.16           90.08
## 564 LP002821         1       0.16           90.24
## 565 LP002832         1       0.16           90.40
## 566 LP002833         1       0.16           90.56
## 567 LP002836         1       0.16           90.72
## 568 LP002837         1       0.16           90.88
## 569 LP002840         1       0.16           91.04
## 570 LP002841         1       0.16           91.20
## 571 LP002842         1       0.16           91.36
## 572 LP002847         1       0.16           91.52
## 573 LP002855         1       0.16           91.68
## 574 LP002862         1       0.16           91.84
## 575 LP002863         1       0.16           92.00
## 576 LP002868         1       0.16           92.16
## 577 LP002872         1       0.16           92.32
## 578 LP002874         1       0.16           92.48
## 579 LP002877         1       0.16           92.64
## 580 LP002888         1       0.16           92.80
## 581 LP002892         1       0.16           92.96
## 582 LP002893         1       0.16           93.12
## 583 LP002894         1       0.16           93.28
## 584 LP002898         1       0.16           93.44
## 585 LP002911         1       0.16           93.60
## 586 LP002912         1       0.16           93.76
## 587 LP002916         1       0.16           93.92
## 588 LP002917         1       0.16           94.08
## 589 LP002925         1       0.16           94.24
## 590 LP002926         1       0.16           94.40
## 591 LP002928         1       0.16           94.56
## 592 LP002931         1       0.16           94.72
## 593 LP002933         1       0.16           94.88
## 594 LP002936         1       0.16           95.04
## 595 LP002938         1       0.16           95.20
## 596 LP002940         1       0.16           95.36
## 597 LP002941         1       0.16           95.52
## 598 LP002943         1       0.16           95.68
## 599 LP002945         1       0.16           95.84
## 600 LP002948         1       0.16           96.00
## 601 LP002949         1       0.16           96.16
## 602 LP002950         1       0.16           96.32
## 603 LP002953         1       0.16           96.48
## 604 LP002958         1       0.16           96.64
## 605 LP002959         1       0.16           96.80
## 606 LP002960         1       0.16           96.96
## 607 LP002961         1       0.16           97.12
## 608 LP002964         1       0.16           97.28
## 609 LP002974         1       0.16           97.44
## 610 LP002978         1       0.16           97.60
## 611 LP002979         1       0.16           97.76
## 612 LP002983         1       0.16           97.92
## 613 LP002984         1       0.16           98.08
## 614 LP002990         1       0.16          100.00

##   Gender frequency percentage cumulative_perc
## 1   Male       489      79.64           79.64
## 2 Female       112      18.24           97.88
## 3               13       2.12          100.00

##   Married frequency percentage cumulative_perc
## 1     Yes       398      64.82           64.82
## 2      No       213      34.69           99.51
## 3                 3       0.49          100.00

##   Dependents frequency percentage cumulative_perc
## 1          0       345      56.19           56.19
## 2          1       102      16.61           72.80
## 3          2       101      16.45           89.25
## 4         3+        51       8.31           97.56
## 5                   15       2.44          100.00

##      Education frequency percentage cumulative_perc
## 1     Graduate       480      78.18           78.18
## 2 Not Graduate       134      21.82          100.00

##   Self_Employed frequency percentage cumulative_perc
## 1            No       500      81.43           81.43
## 2           Yes        82      13.36           94.79
## 3                      32       5.21          100.00

##   Property_Area frequency percentage cumulative_perc
## 1     Semiurban       233      37.95           37.95
## 2         Urban       202      32.90           70.85
## 3         Rural       179      29.15          100.00

##   Loan_Status frequency percentage cumulative_perc
## 1           Y       422      68.73           68.73
## 2           N       192      31.27          100.00

## data 
## 
##  13  Variables      614  Observations
## --------------------------------------------------------------------------------
## Loan_ID 
##        n  missing distinct 
##      614        0      614 
## 
## lowest : LP001002 LP001003 LP001005 LP001006 LP001008
## highest: LP002978 LP002979 LP002983 LP002984 LP002990
## --------------------------------------------------------------------------------
## Gender 
##        n  missing distinct 
##      601       13        2 
##                         
## Value      Female   Male
## Frequency     112    489
## Proportion  0.186  0.814
## --------------------------------------------------------------------------------
## Married 
##        n  missing distinct 
##      611        3        2 
##                       
## Value         No   Yes
## Frequency    213   398
## Proportion 0.349 0.651
## --------------------------------------------------------------------------------
## Dependents 
##        n  missing distinct 
##      599       15        4 
##                                   
## Value          0     1     2    3+
## Frequency    345   102   101    51
## Proportion 0.576 0.170 0.169 0.085
## --------------------------------------------------------------------------------
## Education 
##        n  missing distinct 
##      614        0        2 
##                                     
## Value          Graduate Not Graduate
## Frequency           480          134
## Proportion        0.782        0.218
## --------------------------------------------------------------------------------
## Self_Employed 
##        n  missing distinct 
##      582       32        2 
##                       
## Value         No   Yes
## Frequency    500    82
## Proportion 0.859 0.141
## --------------------------------------------------------------------------------
## ApplicantIncome 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      614        0      505        1     5403     4183     1898     2216 
##      .25      .50      .75      .90      .95 
##     2878     3812     5795     9460    14583 
## 
## lowest :   150   210   416   645   674, highest: 39147 39999 51763 63337 81000
## --------------------------------------------------------------------------------
## CoapplicantIncome 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      614        0      287    0.912     1621     2118        0        0 
##      .25      .50      .75      .90      .95 
##        0     1188     2297     3782     4997 
## 
## lowest :     0.00    16.12   189.00   240.00   242.00
## highest: 10968.00 11300.00 20000.00 33837.00 41667.00
## --------------------------------------------------------------------------------
## LoanAmount 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      614        0      204        1    141.2    84.09     38.6     63.6 
##      .25      .50      .75      .90      .95 
##     98.0    125.0    164.8    229.4    293.4 
## 
## lowest :   0   9  17  25  26, highest: 500 570 600 650 700
## --------------------------------------------------------------------------------
## Loan_Amount_Term 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##      614        0       11     0.42    334.2    57.12      180      180 
##      .25      .50      .75      .90      .95 
##      360      360      360      360      360 
## 
## lowest :   0  12  36  60  84, highest: 180 240 300 360 480
##                                                                             
## Value          0    12    36    60    84   120   180   240   300   360   480
## Frequency     14     1     2     2     4     3    44     4    13   512    15
## Proportion 0.023 0.002 0.003 0.003 0.007 0.005 0.072 0.007 0.021 0.834 0.024
## --------------------------------------------------------------------------------
## Credit_History 
##        n  missing distinct     Info      Sum     Mean      Gmd 
##      614        0        2    0.525      475   0.7736   0.3508 
## 
## --------------------------------------------------------------------------------
## Property_Area 
##        n  missing distinct 
##      614        0        3 
##                                         
## Value          Rural Semiurban     Urban
## Frequency        179       233       202
## Proportion     0.292     0.379     0.329
## --------------------------------------------------------------------------------
## Loan_Status 
##        n  missing distinct 
##      614        0        2 
##                       
## Value          N     Y
## Frequency    192   422
## Proportion 0.313 0.687
## --------------------------------------------------------------------------------

Python

SweetViz = sv.analyze(dataphy)
## C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\dataframe_report.py:74: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   all_source_names = [cur_name for cur_name, cur_series in source_df.iteritems()]
## C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\dataframe_report.py:109: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   filtered_series_names_in_source = [cur_name for cur_name, cur_series in source_df.iteritems()
## 
                                             |          | [  0%]   00:00 -> (? left)
[Summarizing dataframe]                      |          | [  0%]   00:00 -> (? left)
Feature: Loan_ID                             |▋         | [  7%]   00:00 -> (00:00 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_text.py:19: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   for item in to_process.source_counts["value_counts_without_nan"].iteritems():
## 
Feature: Gender                              |█▍        | [ 14%]   00:00 -> (00:00 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_cat.py:28: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   for item in category_counts.iteritems():
## 
Feature: Gender                              |██▏       | [ 21%]   00:00 -> (00:01 left)
Feature: Married                             |██▏       | [ 21%]   00:00 -> (00:01 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_cat.py:28: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   for item in category_counts.iteritems():
## 
Feature: Married                             |██▊       | [ 29%]   00:00 -> (00:02 left)
Feature: Dependents                          |██▊       | [ 29%]   00:00 -> (00:02 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_cat.py:28: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   for item in category_counts.iteritems():
## 
Feature: Dependents                          |███▌      | [ 36%]   00:01 -> (00:02 left)
Feature: Education                           |███▌      | [ 36%]   00:01 -> (00:02 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_cat.py:28: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   for item in category_counts.iteritems():
## 
Feature: Education                           |████▎     | [ 43%]   00:01 -> (00:02 left)
Feature: Self_Employed                       |████▎     | [ 43%]   00:01 -> (00:02 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_cat.py:28: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   for item in category_counts.iteritems():
## 
Feature: Self_Employed                       |█████     | [ 50%]   00:01 -> (00:02 left)
Feature: ApplicantIncome                     |█████     | [ 50%]   00:01 -> (00:02 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_numeric.py:25: FutureWarning: The 'mad' method is deprecated and will be removed in a future version. To compute the same result, you may do `(df - df.mean()).abs().mean()`.
##   stats["mad"] = series.mad()
## 
Feature: ApplicantIncome                     |█████▋    | [ 57%]   00:02 -> (00:03 left)
Feature: CoapplicantIncome                   |█████▋    | [ 57%]   00:02 -> (00:03 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_numeric.py:25: FutureWarning: The 'mad' method is deprecated and will be removed in a future version. To compute the same result, you may do `(df - df.mean()).abs().mean()`.
##   stats["mad"] = series.mad()
## 
Feature: CoapplicantIncome                   |██████▍   | [ 64%]   00:03 -> (00:03 left)
Feature: LoanAmount                          |██████▍   | [ 64%]   00:03 -> (00:03 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_numeric.py:25: FutureWarning: The 'mad' method is deprecated and will be removed in a future version. To compute the same result, you may do `(df - df.mean()).abs().mean()`.
##   stats["mad"] = series.mad()
## 
Feature: LoanAmount                          |███████▏  | [ 71%]   00:04 -> (00:02 left)
Feature: Loan_Amount_Term                    |███████▏  | [ 71%]   00:04 -> (00:02 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_numeric.py:25: FutureWarning: The 'mad' method is deprecated and will be removed in a future version. To compute the same result, you may do `(df - df.mean()).abs().mean()`.
##   stats["mad"] = series.mad()
## 
Feature: Loan_Amount_Term                    |███████▊  | [ 79%]   00:05 -> (00:02 left)
Feature: Credit_History                      |███████▊  | [ 79%]   00:05 -> (00:02 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_cat.py:28: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   for item in category_counts.iteritems():
## 
Feature: Credit_History                      |████████▌ | [ 86%]   00:06 -> (00:01 left)
Feature: Property_Area                       |████████▌ | [ 86%]   00:06 -> (00:01 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_cat.py:28: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   for item in category_counts.iteritems():
## 
Feature: Property_Area                       |█████████▎| [ 93%]   00:06 -> (00:00 left)
Feature: Loan_Status                         |█████████▎| [ 93%]   00:06 -> (00:00 left)C:\Users\Lenovo\ANACON~1\envs\py10\lib\site-packages\sweetviz\series_analyzer_cat.py:28: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
##   for item in category_counts.iteritems():
## 
Feature: Loan_Status                         |██████████| [100%]   00:06 -> (00:00 left)
Feature: Loan_Status                         |          | [  0%]   00:00 -> (? left)    
[Step 2/3] Processing Pairwise Features      |          | [  0%]   00:00 -> (? left)
[Step 2/3] Processing Pairwise Features      |          | [  0%]   00:00 -> (? left)
[Step 3/3] Generating associations graph     |          | [  0%]   00:00 -> (? left)
Done! Use 'show' commands to display/save.   |          | [  0%]   00:00 -> (? left)
Done! Use 'show' commands to display/save.   |██████████| [100%]   00:00 -> (00:00 left)
Done! Use 'show' commands to display/save.   |██████████| [100%]   00:00 -> (00:00 left)
SweetViz.show_html()
## Report SWEETVIZ_REPORT.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.

file:///C:/Users/Lenovo/Documents/Statistik/SWEETVIZ_REPORT.html

Tugas 4

Lakukan pemeriksaan distribusi densitas menggunakan R dan Python pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:

Univariat numerik

R

ggplot(data, aes(x = ApplicantIncome))+
  geom_density()

ggplot(data, aes(x = CoapplicantIncome))+
  geom_density()

ggplot(data, aes(x = LoanAmount))+
  geom_density()

Python

sns.set_theme(style="darkgrid")
fig, axs = plt.subplots(2,2, figsize = (9,9))

sns.kdeplot(data_num.ApplicantIncome, shade= True, ax= axs[0,0], color='red')
sns.kdeplot(data_num.CoapplicantIncome, shade= True, ax= axs[0,1], color='blue')
sns.kdeplot(data_num.LoanAmount, shade= True, ax= axs[1,1], color='purple')
sns.kdeplot(data_num.Credit_History, shade= True, ax= axs[1,0], color='green')

Bivariat numerik

R

bivariat <- ggplot(data, aes(x = ApplicantIncome, y = LoanAmount)) + 
  geom_point(alpha = .5) +
  geom_density_2d()
ggplotly(bivariat)
bivariat2 <- ggplot(data, aes(x = ApplicantIncome, y = CoapplicantIncome)) + 
  geom_point(alpha = .5) +
  geom_density_2d()
ggplotly(bivariat2)

Python

sns.kdeplot(
    data=data_num, x="ApplicantIncome", y="LoanAmount", fill=True, color = "r"
)

sns.kdeplot(
    data=data_num, x="CoapplicantIncome", y="LoanAmount", fill=True, color = "green"
)

Multivariat numerik

R

ggpairs(datanum)

Python

g = sns.jointplot(data=dataphy, x="ApplicantIncome", y="LoanAmount", hue="Property_Area", kind="kde")
g.plot_marginals(sns.rugplot, color="r", height=-.15, clip_on=False)
## <seaborn.axisgrid.JointGrid object at 0x00000150F335A410>

Tugas 5

Lakukan proses pengujian Hipotesis menggunakan R dan Python pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:

Hitunglah margin of error dan estimasi interval untuk proporsi peminjam bejenis kelamin perempuan dalam pada tingkat kepercayaan 95%.

R

k = sum(data.clean$Gender == "Female")
n = sum(count(data.clean))
pbar = k/n 
SE = sqrt (pbar*(1-pbar)/n); SE
## [1] 0.01668873
E = qnorm(.975)*SE; E
## [1] 0.0327093
pbar + c(-E, E)
## [1] 0.1468748 0.2122934

Python

P_Female = P_Gender.Female
z_critical = ss.norm.ppf(q = 0.975)

margin_error = z_critical * math.sqrt(P_Female *(1 - P_Female)/len(dataphy))
interval = (P_Female - margin_error, P_Female + margin_error)

print("Margin of Error :")
## Margin of Error :
print(margin_error)
## 0.030546144664534237
print('Interval of Estimation :')
## Interval of Estimation :
print(interval)
## (0.1518642787882345, 0.21295656811730296)

Jika anda berencana menggunakan perkiraan proporsi 50% data konsumen berjenis kelamin perempuan, temukan ukuran sampel yang diperlukan untuk mencapai margin kesalahan 5% untuk data obeservasi pada tingkat kepercayaan 95%.

R

zstar = qnorm(.975)
p = 0.5
E = 0.05
zstar^2*p*(1-p)/E^2
## [1] 384.1459

Python

z_critical = ss.norm.ppf(q = 0.975)
p = 0.5
E = 0.05
sample_needed = int(z_critical**2*p*(1-p)/E**2)

print('ukuran sample yang dibutuhkan')
## ukuran sample yang dibutuhkan
print(sample_needed)
## 384

Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, jika Bank mengklaim bahwa pinjaman rata-rata konsumen adalah:

R

set.seed(100)
Data1 <- sample_n(data.clean,30)
Data1

Lebih besar $ 150.

mu0 = 150
xbar = mean(Data1$LoanAmount)
s = sd(Data1$LoanAmount)
n = sum(count(Data1))
t = (xbar-mu0)/(s/sqrt(n));t
## [1] 0.5127632
alpha = .05
t.alpha = qt(1-alpha, df=n-1)
t.alpha
## [1] 1.699127

Lebih kecil $ 150

mu0 = 150
xbar = mean(Data1$LoanAmount)
s = sd(Data1$LoanAmount)
n = sum(count(Data1))
t = (xbar-mu0)/(s/sqrt(n));t
## [1] 0.5127632
alpha = .05
t.alpha = qt(1-alpha, df=n-1)
-t.alpha
## [1] -1.699127

Sama dengan $ 150.

mu0 = 150
xbar = mean(Data1$LoanAmount)
s = sd(Data1$LoanAmount)
n = sum(count(Data1))
t = (xbar-mu0)/(s/sqrt(n));t
## [1] 0.5127632
alpha = .05
t.alpha = qt(1-alpha, df=n-1)
t.alpha
## [1] 1.699127
-t.alpha
## [1] -1.699127

Python

dataphy
##       Loan_ID  Gender Married  ... Credit_History Property_Area Loan_Status
## 0    LP001002    Male      No  ...            1.0         Urban           Y
## 1    LP001003    Male     Yes  ...            1.0         Rural           N
## 2    LP001005    Male     Yes  ...            1.0         Urban           Y
## 3    LP001006    Male     Yes  ...            1.0         Urban           Y
## 4    LP001008    Male      No  ...            1.0         Urban           Y
## ..        ...     ...     ...  ...            ...           ...         ...
## 609  LP002978  Female      No  ...            1.0         Rural           Y
## 610  LP002979    Male     Yes  ...            1.0         Rural           Y
## 611  LP002983    Male     Yes  ...            1.0         Urban           Y
## 612  LP002984    Male     Yes  ...            1.0         Urban           Y
## 613  LP002990  Female      No  ...            0.0     Semiurban           N
## 
## [614 rows x 13 columns]
mu0 = 150
xbar = dataphy['LoanAmount'].mean()
s = dataphy['LoanAmount'].std()
n = len(dataphy)
t = (xbar-mu0)/(s/(n**(1/2)))

print("t hitung")
## t hitung
print(t)
## -1.251382432758356

Lebih besar $ 150.

alpha = 0.05
t_alpha = ss.t.isf(alpha, n - 1)

print("t hitung")
## t hitung
print(t)
## -1.251382432758356
print("t table 0.5 one tail positive")
## t table 0.5 one tail positive
print(t_alpha)
## 1.64734316785344

Lebih kecil $ 150

alpha = 0.05
t_alpha = ss.t.isf(alpha, n - 1)  

print("t hitung")
## t hitung
print(t)
## -1.251382432758356
print("t table 0.5 one tail negative")
## t table 0.5 one tail negative
print(-t_alpha)
## -1.64734316785344

Sama dengan $ 150.

alpha = 0.05
t_alpha = ss.t.isf(alpha/2, n-1)  

print("t hitung")
## t hitung
print(t)
## -1.251382432758356
print("t table 0.5 two tail")
## t table 0.5 two tail
print(-t_alpha, t_alpha)
## -1.963841443661033 1.963841443661033

Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, seperti diatas jika diketahui simpangan baku pinjaman adalah $ 85.

R

mu0 = 150
xbar = mean(Data1$LoanAmount)
s = 85
n = sum(count(Data1))
t = (xbar-mu0)/(s/sqrt(n));t
## [1] 0.4940243
alpha = .05
t.alpha = qt(1-alpha, df=n-1)
t.alpha
## [1] 1.699127

Python

mu0 = 150
xbar = dataphy['LoanAmount'].mean()
s_2 = 85
n = len(dataphy)
t_2 = (xbar-mu0)/(s_2/(n**(1/2)))

print("t hitung")
## t hitung
print(t_2)
## -1.2382389920020291

Lebih besar $ 150

alpha = 0.05
t_alpha = ss.t.isf(alpha, n - 1)  # bisa juga pakai t_alpha = ss.t.ppf(alpha-1, n-1)

print("t hitung")
## t hitung
print(t_2)
## -1.2382389920020291
print("t table 0.5 one tail positive")
## t table 0.5 one tail positive
print(t_alpha)
## 1.64734316785344

Lebih kecil $ 150

alpha = 0.05
t_alpha = ss.t.isf(alpha, n - 1)  # bisa juga pakai t_alpha = ss.t.ppf(alpha-1, n-1)

print("t hitung")
## t hitung
print(t_2)
## -1.2382389920020291
print("t table 0.5 one tail negative")
## t table 0.5 one tail negative
print(-t_alpha)
## -1.64734316785344

Sama dengan $ 150.

alpha = 0.05
t_alpha = ss.t.isf(alpha/2, n - 1)  # bisa juga pakai t_alpha = ss.t.ppf(alpha-1, n-1)

print("t hitung")
## t hitung
print(t_2)
## -1.2382389920020291
print("t table 0.5 two tail")
## t table 0.5 two tail
print(-t_alpha, t_alpha)
## -1.963841443661033 1.963841443661033