METODE STATISTIKA
~ Ujian Tengah Semester ~
| NIM | 20205520004 |
| Prodi | Teknik Informatika |
| sabrina.amelia@student.matanauniversity.ac.id | |
| RPubs | https://rpubs.com/sabrinayose/ |
| Github | https://github.com/sabrinayose/ |
Tugas 1
Lakukan proses persiapan data dengan R dan Python, dengan beberapa langkah berikut:
1.1 Import Data
library(zoo)
df <- read.csv("loan-train.csv")
df1.2 Penanganan Data Hilang
colSums(is.na(df)) ## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 22 14 50 0
## Loan_Status
## 0
1.2.1 Dengan Cara Menghapus
df_rm <- na.omit(df)
print(colSums(is.na(df_rm))) ## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 0 0 0 0
## Loan_Status
## 0
1.2.2 Input Mean/Modus/Median
Mean
df <- read.csv("loan-train.csv")
df$LoanAmount [is.na(df$LoanAmount )] <- mean(df$LoanAmount, na.rm = TRUE)
colSums(is.na(df))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 0 14 50 0
## Loan_Status
## 0
Mode
df <- read.csv("loan-train.csv")
df$Loan_Amount_Term[is.na(df$Loan_Amount_Term)] <- mode(df$Loan_Amount_Term)
colSums(is.na(df))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 22 0 50 0
## Loan_Status
## 0
1.2.3 Interpolasi Linier
df <- read.csv("loan-train.csv")
df$Credit_History<-na.approx(df$Credit_History)
colSums(is.na(df))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 22 14 0 0
## Loan_Status
## 0
1.2.4 Forwarding Filling
require(tidyr)
require(dplyr)
df <- read.csv("loan-train.csv")
df <- df %>% fill(Loan_Amount_Term)
colSums(is.na(df))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 22 0 50 0
## Loan_Status
## 0
1.2.5 Backward Filling
require(tidyr)
require(dplyr)
df <- read.csv("loan-train.csv")
df <- df %>% fill(LoanAmount, .direction="up")
colSums(is.na(df))## Loan_ID Gender Married Dependents
## 0 0 0 0
## Education Self_Employed ApplicantIncome CoapplicantIncome
## 0 0 0 0
## LoanAmount Loan_Amount_Term Credit_History Property_Area
## 0 14 50 0
## Loan_Status
## 0
1.3 Periksa Data Duplikat
df <- read.csv("loan-train.csv")
df %>% count(df$Self_Employed) %>% filter(n>1) %>% select(-n)1.4 Pemisahan Data Kategori dan Numerik
Filter(is.numeric, df)Filter(is.character, df)1.5 Penanganan Data Numerik
1.5.1 Standardisasi
df <- read.csv("loan-train.csv")
df_rm <- na.omit(df_rm)
df_rm$LoanAmount_scaled <- scale(df_rm$LoanAmount)
df_rm$Loan_Amount_Term_scaled <- scale(df_rm$Loan_Amount_Term)
df_rm1.5.2 Normalisasi
df <- read.csv("loan-train.csv")
normalize <- function(x){
return ((x - min(x) / max(x) - min(x)))
}
df_rm <- na.omit(df_rm)
df_rm$LoanAmount_norm <- normalize(df_rm$LoanAmount)
df_rm$Loan_Amount_Term_norm <- normalize(df_rm$Loan_Amount_Term)
df_rm1.5.3 Penskalaan Robust
df <- read.csv("loan-train.csv")
robust <- function(x){
return ((x - quantile(x)[2] / (quantile(x)[4] - quantile(x)[2])))
}
df_rm <- na.omit(df_rm)
df_rm$LoanAmount_robust <- robust(df_rm$LoanAmount)
df_rm$Loan_Amount_Term_robust <- robust(df_rm$Loan_Amount_Term)
df_rm1.6 Penanganan Data Pencilan
1.6.1 Metode Statistik
Distribusi Gaussian
df <- read.csv("loan-train.csv")
outliers <- function(x){
sample_mean = mean(x)
sample_std = sd(x)
cut_off = sample_std * 1
lower = sample_mean - cut_off
upper = sample_mean + cut_off
return (sapply(x, function(x) {
return(x < lower || x > upper)
}))
}
df_rm <- na.omit(df_rm)
df_rm[outliers(df_rm$LoanAmount),]Boxplot atau Rentang Interkuartil (IQR)
df <- read.csv("loan-train.csv")
boxplot(df$Loan_Amount_Term)1.7 Penanganan Data Kategorikal
df <- read.csv("loan-train.csv")
dim(df)## [1] 614 13
df <- read.csv("loan-train.csv")
head(df, 5)df <- read.csv("loan-train.csv")
df_char <- Filter(is.character, df)
colSums(is.na(df_char)) ## Loan_ID Gender Married Dependents Education
## 0 0 0 0 0
## Self_Employed Property_Area Loan_Status
## 0 0 0
1.7.1 Pelabelan
df <- read.csv("loan-train.csv")
library(superml)
df_label <- LabelEncoder$new()
df$LoanAmount <- df_label$fit_transform(df$LoanAmount)
df$Married <- df_label$fit_transform(df$Married)
df$Education <- df_label$fit_transform(df$Education)
df1.7.2 Pemetaan Kustom
df <- read.csv("loan-train.csv")
df$Loan_Status[df$Loan_Status=="Y"] <- 1
df$Loan_Status[df$Loan_Status=="N"] <- 0
df1.7.3 Variabel Dummy
library(fastDummies)
df <- read.csv("loan-train.csv")
df <- dummy_cols(df)
df1.7.4 K-fold/Cross-fold
library(tidyverse)
library(caret)
library(ISLR)
library(lattice)
df <- read.csv("loan-train.csv")
df$Loan_Status[df$Loan_Status=="Y"] <- 1
df$Loan_Status[df$Loan_Status=="N"] <- 0
df1 <- na.omit(df)set.seed(100)
dataset <- trainControl(method = "cv", number = 10, savePredictions = TRUE)
nb_fit <- train(Credit_History~., data = df1, method = "lm", trControl = dataset, tuneLength =14)
nb_fit## Linear Regression
##
## 529 samples
## 12 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 476, 476, 476, 476, 476, 477, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.4396691 NaN 0.2230044
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Tugas 2
Lakukan Proses Visualisasi Data dengan menggunakan R dan Python dengan beberapa langkah berikut:
2.1 Visualisasi Univariabel
2.1.1 Categorical
Bar Chart
library(ggplot2)
df <- read.csv("loan-train.csv")
ggplot(df, aes(x = Property_Area)) +
geom_bar(fill = "cornflowerblue",
color = "azure4") +
theme_minimal() +
labs(x = "Property Area",
y = "Frequency",
title = "Loan Train by Property Area") Pie Chart
library(dplyr)
library(ggplot2)
library(scales)
# Data preparation
plotdata <- df %>%
count(Property_Area) %>%
arrange(desc(Property_Area)) %>%
mutate(prop = round(n*100/sum(n), 1),
lab.ypos = cumsum(prop) - 0.5*prop)
# Create Pie chart
mycols <- c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF")
ggplot(plotdata, aes(x = "", y = prop, fill = Property_Area)) +
geom_bar(width = 1, stat = "identity", color = "white") +
coord_polar("y", start = 0)+
geom_text(aes(y = lab.ypos, label = prop), color = "white")+
scale_fill_manual(values = mycols) +
theme_void()+
labs(title = "Loan Train by Property Area")Tree Map
library(ggplot2)
library(treemapify)
library(scales)
plotdata <- df %>%
count(Property_Area)
ggplot(plotdata,
aes(fill = Property_Area,
area = n)) +
geom_treemap() +
labs(title = "Loan Train by Property Area")2.1.2 Continuous
Histogram
library(ggplot2)
ggplot(df, aes(x = LoanAmount)) +
geom_histogram(fill = "#C04343", color = "white", bins = 20) +
theme_minimal() +
labs(title="Loan Train by Property Area", x = "Loan Amount")Kernel Density Plot
library(ggplot2)
ggplot(df, aes(x = Property_Area)) +
geom_density(fill = "indianred3") +
theme_minimal() +
labs(title = "Loan Train by Property Area")Dot Chart
library(ggplot2)
ggplot(df, aes(x = Dependents)) +
geom_dotplot(fill = "gold",
color = "azure4") +
theme_minimal() +
labs(title = "Loan Train by Loan_Status",
y = "Frequency",
x = "Loan Status")2.2 Visualisasi Bivariabel
2.2.1 Categorical vs. Categorical
Grouped Bar Chart
library(ggplot2) # for visualization
ggplot(df, aes(x = Education, fill = Property_Area)) +
theme_minimal() + # use a minimal theme
geom_bar(position = position_dodge(preserve = "single"))2.2.2 Continuous vs. Continuous
Scatterplot Fit Lines
library(ggplot2)
ggplot(df,
aes(x = ApplicantIncome,
y = LoanAmount)) +
geom_point(color= "cornflowerblue") +
geom_smooth(method = "lm", color = "brown1")+
theme_minimal() + # use a minimal theme
labs(x = "Applicant Income",
y = "",
title = "Applicant Income vs. Loan Amount") ### 2.2.3 Categorical vs. Continuous Grouped Kernel Density Plots
ggplot(df,
aes(x = ApplicantIncome,
fill = Education)) +
geom_density(alpha = 0.4) +
theme_minimal() +
labs(title = "Applicant Income distribution by Education")2.3 Visualisasi Multivariabel
2.3.1 Grouping
library(carData) # for dataset
library(ggplot2) # for visulization
data(df, package="carData")
ggplot(df, aes(x = ApplicantIncome,
y = LoanAmount,
color=Education)) +
geom_point() +
theme_minimal() +
labs(title = "Loan Amount by Applicant Income and Education")2.3.2 Faceting
library(carData)
library(ggplot2)
ggplot(df, aes(x = LoanAmount)) +
geom_histogram(fill = "cornflowerblue",
color = "white") +
facet_wrap(~Education, ncol = 1) +
theme_minimal() +
labs(title = "Loan Amount histograms by Education")Tugas 3
Lakukan proses analisa data secara deskriptif menggunakan R dan Python dengan beberapa langkah berikut:
3.1 Kualitatif
3.1.1 Kategori Univariat
df = read.csv("loan-train.csv")
apply(is.na(df),2, which) ## $Loan_ID
## integer(0)
##
## $Gender
## integer(0)
##
## $Married
## integer(0)
##
## $Dependents
## integer(0)
##
## $Education
## integer(0)
##
## $Self_Employed
## integer(0)
##
## $ApplicantIncome
## integer(0)
##
## $CoapplicantIncome
## integer(0)
##
## $LoanAmount
## [1] 1 36 64 82 96 103 104 114 128 203 285 306 323 339 388 436 438 480 525
## [20] 551 552 606
##
## $Loan_Amount_Term
## [1] 20 37 45 46 74 113 166 198 224 233 336 368 422 424
##
## $Credit_History
## [1] 17 25 31 43 80 84 87 96 118 126 130 131 157 182 188 199 220 237 238
## [20] 260 261 280 310 314 318 319 324 349 364 378 393 396 412 445 450 452 461 474
## [39] 491 492 498 504 507 531 534 545 557 566 584 601
##
## $Property_Area
## integer(0)
##
## $Loan_Status
## integer(0)
df<-na.omit(df)
head(df,3) Cat1 <- table(df$Gender) # count the frequencies
Cat1 ##
## Female Male
## 12 95 422
prop.table(table(df$Gender)) ##
## Female Male
## 0.02268431 0.17958412 0.79773157
3.1.2 Kategori Bivariat
library(readr)
library(dplyr)
library(magrittr)
Cat2<- df %>%
select(Gender, Education) %>%
table()
Cat2 ## Education
## Gender Graduate Not Graduate
## 11 1
## Female 80 15
## Male 330 92
3.1.3 Kategori Multivariat
Cat3 <- df %>%
select(Gender, Education, Married) %>%
ftable()
Cat3 ## Married No Yes
## Gender Education
## Graduate 0 3 8
## Not Graduate 0 0 1
## Female Graduate 0 57 23
## Not Graduate 0 11 4
## Male Graduate 2 91 237
## Not Graduate 0 26 66
3.2 Kuantitatif
3.2.1 Univariat numerik
3.2.1.1 Measures of Central Tendency
Quan <- df %>%
select_if(is.numeric)
names(Quan) ## [1] "ApplicantIncome" "CoapplicantIncome" "LoanAmount"
## [4] "Loan_Amount_Term" "Credit_History"
Mean
mean(Quan$LoanAmount) ## [1] 145.8526
Quantile
quantile(Quan$LoanAmount) ## 0% 25% 50% 75% 100%
## 9 100 128 167 700
Median
median(Quan$LoanAmount) ## [1] 128
Mode
mode(Quan$LoanAmount) ## [1] "numeric"
Summary
summary(Quan) ## ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term
## Min. : 150 Min. : 0 Min. : 9.0 Min. : 36.0
## 1st Qu.: 2900 1st Qu.: 0 1st Qu.:100.0 1st Qu.:360.0
## Median : 3816 Median : 1086 Median :128.0 Median :360.0
## Mean : 5508 Mean : 1542 Mean :145.9 Mean :342.4
## 3rd Qu.: 5815 3rd Qu.: 2232 3rd Qu.:167.0 3rd Qu.:360.0
## Max. :81000 Max. :33837 Max. :700.0 Max. :480.0
## Credit_History
## Min. :0.0000
## 1st Qu.:1.0000
## Median :1.0000
## Mean :0.8507
## 3rd Qu.:1.0000
## Max. :1.0000
3.2.1.2 Scale
Variance
var(Quan$LoanAmount) ## [1] 7074.224
Standard Deviation
sd(Quan$LoanAmount) ## [1] 84.10841
Median Absolute Deviation
mad(Quan$LoanAmount) ## [1] 45.9606
Inter Quantile Range
IQR(Quan$LoanAmount) ## [1] 67
3.2.1.3 Skewness
library(e1071)
skewness(Quan$LoanAmount)## [1] 2.593174
3.2.1.4 Kurtosis
kurtosis(Quan$LoanAmount)## [1] 9.936842
3.2.2 Bivariat numerik
Covariance
cov(Quan$LoanAmount,Quan$Loan_Amount_Term)## [1] 126.7792
Pearson’s Correlation Coefficient
cor(Quan$LoanAmount,Quan$Loan_Amount_Term)## [1] 0.02323917
Z-Score
zscore=(Quan$LoanAmount-mean(Quan$LoanAmount))/sd(Quan$LoanAmount)3.2.3 Multivariat numerik
Sample Covariance Matrix
cov(Quan) ## ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome 4.101291e+07 -1.982437e+06 3.074072e+05
## CoapplicantIncome -1.982437e+06 6.372069e+06 3.379027e+04
## LoanAmount 3.074072e+05 3.379027e+04 7.074224e+03
## Loan_Amount_Term -2.611139e+04 -4.740321e+01 1.267792e+02
## Credit_History -5.432772e+01 -9.768561e+00 -5.447886e-01
## Loan_Amount_Term Credit_History
## ApplicantIncome -2.611139e+04 -54.3277231
## CoapplicantIncome -4.740321e+01 -9.7685610
## LoanAmount 1.267792e+02 -0.5447886
## Loan_Amount_Term 4.207035e+03 0.2003351
## Credit_History 2.003351e-01 0.1272770
Sample Correlation Matrix
cor(Quan) ## ApplicantIncome CoapplicantIncome LoanAmount
## ApplicantIncome 1.00000000 -0.1226305807 0.57070849
## CoapplicantIncome -0.12263058 1.0000000000 0.15915197
## LoanAmount 0.57070849 0.1591519703 1.00000000
## Loan_Amount_Term -0.06286105 -0.0002895206 0.02323917
## Credit_History -0.02377860 -0.0108471425 -0.01815573
## Loan_Amount_Term Credit_History
## ApplicantIncome -0.0628610527 -0.02377860
## CoapplicantIncome -0.0002895206 -0.01084714
## LoanAmount 0.0232391675 -0.01815573
## Loan_Amount_Term 1.0000000000 0.00865753
## Credit_History 0.0086575296 1.00000000
3.3 EDA dengan cara Malas
library(funModeling)
library(tidyverse)
library(Hmisc)
library(skimr)
basic_eda <- function(data)
{
glimpse(data)
skim(data)
df_status(data)
freq(data)
profiling_num(data)
plot_num(data)
describe(data)
}
basic_eda(df)## Rows: 529
## Columns: 13
## $ Loan_ID <chr> "LP001003", "LP001005", "LP001006", "LP001008", "LP0~
## $ Gender <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Mal~
## $ Married <chr> "Yes", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "Yes~
## $ Dependents <chr> "1", "0", "0", "0", "2", "0", "3+", "2", "1", "2", "~
## $ Education <chr> "Graduate", "Graduate", "Not Graduate", "Graduate", ~
## $ Self_Employed <chr> "No", "Yes", "No", "No", "Yes", "No", "No", "No", "N~
## $ ApplicantIncome <int> 4583, 3000, 2583, 6000, 5417, 2333, 3036, 4006, 1284~
## $ CoapplicantIncome <dbl> 1508, 0, 2358, 0, 4196, 1516, 2504, 1526, 10968, 700~
## $ LoanAmount <int> 128, 66, 120, 141, 267, 95, 158, 168, 349, 70, 109, ~
## $ Loan_Amount_Term <int> 360, 360, 360, 360, 360, 360, 360, 360, 360, 360, 36~
## $ Credit_History <int> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0~
## $ Property_Area <chr> "Rural", "Urban", "Urban", "Urban", "Urban", "Urban"~
## $ Loan_Status <chr> "N", "Y", "Y", "Y", "Y", "Y", "N", "Y", "N", "Y", "Y~
## variable q_zeros p_zeros q_na p_na q_inf p_inf type unique
## 1 Loan_ID 0 0.00 0 0 0 0 character 529
## 2 Gender 0 0.00 0 0 0 0 character 3
## 3 Married 0 0.00 0 0 0 0 character 3
## 4 Dependents 295 55.77 0 0 0 0 character 5
## 5 Education 0 0.00 0 0 0 0 character 2
## 6 Self_Employed 0 0.00 0 0 0 0 character 3
## 7 ApplicantIncome 0 0.00 0 0 0 0 integer 442
## 8 CoapplicantIncome 238 44.99 0 0 0 0 numeric 245
## 9 LoanAmount 0 0.00 0 0 0 0 integer 194
## 10 Loan_Amount_Term 0 0.00 0 0 0 0 integer 9
## 11 Credit_History 79 14.93 0 0 0 0 integer 2
## 12 Property_Area 0 0.00 0 0 0 0 character 3
## 13 Loan_Status 0 0.00 0 0 0 0 character 2
## Loan_ID frequency percentage cumulative_perc
## 1 LP001003 1 0.19 0.19
## 2 LP001005 1 0.19 0.38
## 3 LP001006 1 0.19 0.57
## 4 LP001008 1 0.19 0.76
## 5 LP001011 1 0.19 0.95
## 6 LP001013 1 0.19 1.14
## 7 LP001014 1 0.19 1.33
## 8 LP001018 1 0.19 1.52
## 9 LP001020 1 0.19 1.71
## 10 LP001024 1 0.19 1.90
## 11 LP001027 1 0.19 2.09
## 12 LP001028 1 0.19 2.28
## 13 LP001029 1 0.19 2.47
## 14 LP001030 1 0.19 2.66
## 15 LP001032 1 0.19 2.85
## 16 LP001036 1 0.19 3.04
## 17 LP001038 1 0.19 3.23
## 18 LP001043 1 0.19 3.42
## 19 LP001046 1 0.19 3.61
## 20 LP001047 1 0.19 3.80
## 21 LP001050 1 0.19 3.99
## 22 LP001066 1 0.19 4.18
## 23 LP001068 1 0.19 4.37
## 24 LP001073 1 0.19 4.56
## 25 LP001086 1 0.19 4.75
## 26 LP001087 1 0.19 4.94
## 27 LP001095 1 0.19 5.13
## 28 LP001097 1 0.19 5.32
## 29 LP001098 1 0.19 5.51
## 30 LP001100 1 0.19 5.70
## 31 LP001112 1 0.19 5.89
## 32 LP001114 1 0.19 6.08
## 33 LP001116 1 0.19 6.27
## 34 LP001119 1 0.19 6.46
## 35 LP001120 1 0.19 6.65
## 36 LP001131 1 0.19 6.84
## 37 LP001138 1 0.19 7.03
## 38 LP001144 1 0.19 7.22
## 39 LP001146 1 0.19 7.41
## 40 LP001151 1 0.19 7.60
## 41 LP001155 1 0.19 7.79
## 42 LP001157 1 0.19 7.98
## 43 LP001164 1 0.19 8.17
## 44 LP001179 1 0.19 8.36
## 45 LP001186 1 0.19 8.55
## 46 LP001194 1 0.19 8.74
## 47 LP001195 1 0.19 8.93
## 48 LP001197 1 0.19 9.12
## 49 LP001198 1 0.19 9.31
## 50 LP001199 1 0.19 9.50
## 51 LP001205 1 0.19 9.69
## 52 LP001206 1 0.19 9.88
## 53 LP001207 1 0.19 10.07
## 54 LP001222 1 0.19 10.26
## 55 LP001225 1 0.19 10.45
## 56 LP001228 1 0.19 10.64
## 57 LP001233 1 0.19 10.83
## 58 LP001238 1 0.19 11.02
## 59 LP001241 1 0.19 11.21
## 60 LP001243 1 0.19 11.40
## 61 LP001245 1 0.19 11.59
## 62 LP001248 1 0.19 11.78
## 63 LP001253 1 0.19 11.97
## 64 LP001255 1 0.19 12.16
## 65 LP001256 1 0.19 12.35
## 66 LP001259 1 0.19 12.54
## 67 LP001263 1 0.19 12.73
## 68 LP001265 1 0.19 12.92
## 69 LP001267 1 0.19 13.11
## 70 LP001275 1 0.19 13.30
## 71 LP001279 1 0.19 13.49
## 72 LP001282 1 0.19 13.68
## 73 LP001289 1 0.19 13.87
## 74 LP001310 1 0.19 14.06
## 75 LP001316 1 0.19 14.25
## 76 LP001318 1 0.19 14.44
## 77 LP001319 1 0.19 14.63
## 78 LP001322 1 0.19 14.82
## 79 LP001325 1 0.19 15.01
## 80 LP001327 1 0.19 15.20
## 81 LP001333 1 0.19 15.39
## 82 LP001334 1 0.19 15.58
## 83 LP001343 1 0.19 15.77
## 84 LP001345 1 0.19 15.96
## 85 LP001349 1 0.19 16.15
## 86 LP001357 1 0.19 16.34
## 87 LP001367 1 0.19 16.53
## 88 LP001369 1 0.19 16.72
## 89 LP001370 1 0.19 16.91
## 90 LP001379 1 0.19 17.10
## 91 LP001384 1 0.19 17.29
## 92 LP001385 1 0.19 17.48
## 93 LP001387 1 0.19 17.67
## 94 LP001398 1 0.19 17.86
## 95 LP001401 1 0.19 18.05
## 96 LP001404 1 0.19 18.24
## 97 LP001421 1 0.19 18.43
## 98 LP001422 1 0.19 18.62
## 99 LP001426 1 0.19 18.81
## 100 LP001430 1 0.19 19.00
## 101 LP001431 1 0.19 19.19
## 102 LP001432 1 0.19 19.38
## 103 LP001439 1 0.19 19.57
## 104 LP001448 1 0.19 19.76
## 105 LP001451 1 0.19 19.95
## 106 LP001473 1 0.19 20.14
## 107 LP001478 1 0.19 20.33
## 108 LP001482 1 0.19 20.52
## 109 LP001487 1 0.19 20.71
## 110 LP001488 1 0.19 20.90
## 111 LP001489 1 0.19 21.09
## 112 LP001491 1 0.19 21.28
## 113 LP001492 1 0.19 21.47
## 114 LP001493 1 0.19 21.66
## 115 LP001497 1 0.19 21.85
## 116 LP001498 1 0.19 22.04
## 117 LP001504 1 0.19 22.23
## 118 LP001507 1 0.19 22.42
## 119 LP001508 1 0.19 22.61
## 120 LP001514 1 0.19 22.80
## 121 LP001516 1 0.19 22.99
## 122 LP001518 1 0.19 23.18
## 123 LP001519 1 0.19 23.37
## 124 LP001520 1 0.19 23.56
## 125 LP001528 1 0.19 23.75
## 126 LP001529 1 0.19 23.94
## 127 LP001531 1 0.19 24.13
## 128 LP001532 1 0.19 24.32
## 129 LP001535 1 0.19 24.51
## 130 LP001536 1 0.19 24.70
## 131 LP001543 1 0.19 24.89
## 132 LP001546 1 0.19 25.08
## 133 LP001552 1 0.19 25.27
## 134 LP001560 1 0.19 25.46
## 135 LP001562 1 0.19 25.65
## 136 LP001565 1 0.19 25.84
## 137 LP001570 1 0.19 26.03
## 138 LP001572 1 0.19 26.22
## 139 LP001577 1 0.19 26.41
## 140 LP001578 1 0.19 26.60
## 141 LP001579 1 0.19 26.79
## 142 LP001580 1 0.19 26.98
## 143 LP001581 1 0.19 27.17
## 144 LP001585 1 0.19 27.36
## 145 LP001586 1 0.19 27.55
## 146 LP001594 1 0.19 27.74
## 147 LP001603 1 0.19 27.93
## 148 LP001606 1 0.19 28.12
## 149 LP001608 1 0.19 28.31
## 150 LP001610 1 0.19 28.50
## 151 LP001616 1 0.19 28.69
## 152 LP001630 1 0.19 28.88
## 153 LP001633 1 0.19 29.07
## 154 LP001636 1 0.19 29.26
## 155 LP001637 1 0.19 29.45
## 156 LP001639 1 0.19 29.64
## 157 LP001640 1 0.19 29.83
## 158 LP001641 1 0.19 30.02
## 159 LP001644 1 0.19 30.21
## 160 LP001647 1 0.19 30.40
## 161 LP001653 1 0.19 30.59
## 162 LP001656 1 0.19 30.78
## 163 LP001657 1 0.19 30.97
## 164 LP001658 1 0.19 31.16
## 165 LP001664 1 0.19 31.35
## 166 LP001665 1 0.19 31.54
## 167 LP001666 1 0.19 31.73
## 168 LP001673 1 0.19 31.92
## 169 LP001674 1 0.19 32.11
## 170 LP001677 1 0.19 32.30
## 171 LP001688 1 0.19 32.49
## 172 LP001691 1 0.19 32.68
## 173 LP001692 1 0.19 32.87
## 174 LP001693 1 0.19 33.06
## 175 LP001698 1 0.19 33.25
## 176 LP001699 1 0.19 33.44
## 177 LP001702 1 0.19 33.63
## 178 LP001708 1 0.19 33.82
## 179 LP001711 1 0.19 34.01
## 180 LP001713 1 0.19 34.20
## 181 LP001715 1 0.19 34.39
## 182 LP001716 1 0.19 34.58
## 183 LP001720 1 0.19 34.77
## 184 LP001722 1 0.19 34.96
## 185 LP001726 1 0.19 35.15
## 186 LP001732 1 0.19 35.34
## 187 LP001736 1 0.19 35.53
## 188 LP001743 1 0.19 35.72
## 189 LP001744 1 0.19 35.91
## 190 LP001750 1 0.19 36.10
## 191 LP001751 1 0.19 36.29
## 192 LP001754 1 0.19 36.48
## 193 LP001758 1 0.19 36.67
## 194 LP001760 1 0.19 36.86
## 195 LP001761 1 0.19 37.05
## 196 LP001765 1 0.19 37.24
## 197 LP001768 1 0.19 37.43
## 198 LP001776 1 0.19 37.62
## 199 LP001778 1 0.19 37.81
## 200 LP001784 1 0.19 38.00
## 201 LP001790 1 0.19 38.19
## 202 LP001792 1 0.19 38.38
## 203 LP001798 1 0.19 38.57
## 204 LP001800 1 0.19 38.76
## 205 LP001806 1 0.19 38.95
## 206 LP001807 1 0.19 39.14
## 207 LP001811 1 0.19 39.33
## 208 LP001813 1 0.19 39.52
## 209 LP001814 1 0.19 39.71
## 210 LP001819 1 0.19 39.90
## 211 LP001824 1 0.19 40.09
## 212 LP001825 1 0.19 40.28
## 213 LP001835 1 0.19 40.47
## 214 LP001836 1 0.19 40.66
## 215 LP001841 1 0.19 40.85
## 216 LP001843 1 0.19 41.04
## 217 LP001844 1 0.19 41.23
## 218 LP001846 1 0.19 41.42
## 219 LP001849 1 0.19 41.61
## 220 LP001854 1 0.19 41.80
## 221 LP001859 1 0.19 41.99
## 222 LP001868 1 0.19 42.18
## 223 LP001870 1 0.19 42.37
## 224 LP001871 1 0.19 42.56
## 225 LP001872 1 0.19 42.75
## 226 LP001875 1 0.19 42.94
## 227 LP001877 1 0.19 43.13
## 228 LP001882 1 0.19 43.32
## 229 LP001883 1 0.19 43.51
## 230 LP001884 1 0.19 43.70
## 231 LP001888 1 0.19 43.89
## 232 LP001891 1 0.19 44.08
## 233 LP001892 1 0.19 44.27
## 234 LP001894 1 0.19 44.46
## 235 LP001896 1 0.19 44.65
## 236 LP001900 1 0.19 44.84
## 237 LP001903 1 0.19 45.03
## 238 LP001904 1 0.19 45.22
## 239 LP001907 1 0.19 45.41
## 240 LP001910 1 0.19 45.60
## 241 LP001914 1 0.19 45.79
## 242 LP001915 1 0.19 45.98
## 243 LP001917 1 0.19 46.17
## 244 LP001924 1 0.19 46.36
## 245 LP001925 1 0.19 46.55
## 246 LP001926 1 0.19 46.74
## 247 LP001931 1 0.19 46.93
## 248 LP001935 1 0.19 47.12
## 249 LP001936 1 0.19 47.31
## 250 LP001938 1 0.19 47.50
## 251 LP001940 1 0.19 47.69
## 252 LP001945 1 0.19 47.88
## 253 LP001947 1 0.19 48.07
## 254 LP001949 1 0.19 48.26
## 255 LP001953 1 0.19 48.45
## 256 LP001954 1 0.19 48.64
## 257 LP001955 1 0.19 48.83
## 258 LP001963 1 0.19 49.02
## 259 LP001964 1 0.19 49.21
## 260 LP001972 1 0.19 49.40
## 261 LP001974 1 0.19 49.59
## 262 LP001977 1 0.19 49.78
## 263 LP001978 1 0.19 49.97
## 264 LP001993 1 0.19 50.16
## 265 LP001994 1 0.19 50.35
## 266 LP001996 1 0.19 50.54
## 267 LP002002 1 0.19 50.73
## 268 LP002004 1 0.19 50.92
## 269 LP002006 1 0.19 51.11
## 270 LP002024 1 0.19 51.30
## 271 LP002031 1 0.19 51.49
## 272 LP002035 1 0.19 51.68
## 273 LP002050 1 0.19 51.87
## 274 LP002051 1 0.19 52.06
## 275 LP002053 1 0.19 52.25
## 276 LP002065 1 0.19 52.44
## 277 LP002067 1 0.19 52.63
## 278 LP002068 1 0.19 52.82
## 279 LP002082 1 0.19 53.01
## 280 LP002086 1 0.19 53.20
## 281 LP002087 1 0.19 53.39
## 282 LP002097 1 0.19 53.58
## 283 LP002098 1 0.19 53.77
## 284 LP002100 1 0.19 53.96
## 285 LP002101 1 0.19 54.15
## 286 LP002103 1 0.19 54.34
## 287 LP002110 1 0.19 54.53
## 288 LP002112 1 0.19 54.72
## 289 LP002114 1 0.19 54.91
## 290 LP002115 1 0.19 55.10
## 291 LP002116 1 0.19 55.29
## 292 LP002119 1 0.19 55.48
## 293 LP002126 1 0.19 55.67
## 294 LP002128 1 0.19 55.86
## 295 LP002129 1 0.19 56.05
## 296 LP002130 1 0.19 56.24
## 297 LP002131 1 0.19 56.43
## 298 LP002138 1 0.19 56.62
## 299 LP002139 1 0.19 56.81
## 300 LP002140 1 0.19 57.00
## 301 LP002141 1 0.19 57.19
## 302 LP002142 1 0.19 57.38
## 303 LP002143 1 0.19 57.57
## 304 LP002144 1 0.19 57.76
## 305 LP002149 1 0.19 57.95
## 306 LP002151 1 0.19 58.14
## 307 LP002158 1 0.19 58.33
## 308 LP002160 1 0.19 58.52
## 309 LP002161 1 0.19 58.71
## 310 LP002170 1 0.19 58.90
## 311 LP002175 1 0.19 59.09
## 312 LP002180 1 0.19 59.28
## 313 LP002181 1 0.19 59.47
## 314 LP002187 1 0.19 59.66
## 315 LP002190 1 0.19 59.85
## 316 LP002191 1 0.19 60.04
## 317 LP002194 1 0.19 60.23
## 318 LP002197 1 0.19 60.42
## 319 LP002201 1 0.19 60.61
## 320 LP002205 1 0.19 60.80
## 321 LP002209 1 0.19 60.99
## 322 LP002211 1 0.19 61.18
## 323 LP002219 1 0.19 61.37
## 324 LP002224 1 0.19 61.56
## 325 LP002225 1 0.19 61.75
## 326 LP002226 1 0.19 61.94
## 327 LP002229 1 0.19 62.13
## 328 LP002231 1 0.19 62.32
## 329 LP002234 1 0.19 62.51
## 330 LP002236 1 0.19 62.70
## 331 LP002237 1 0.19 62.89
## 332 LP002239 1 0.19 63.08
## 333 LP002244 1 0.19 63.27
## 334 LP002250 1 0.19 63.46
## 335 LP002255 1 0.19 63.65
## 336 LP002262 1 0.19 63.84
## 337 LP002265 1 0.19 64.03
## 338 LP002266 1 0.19 64.22
## 339 LP002277 1 0.19 64.41
## 340 LP002281 1 0.19 64.60
## 341 LP002284 1 0.19 64.79
## 342 LP002287 1 0.19 64.98
## 343 LP002288 1 0.19 65.17
## 344 LP002296 1 0.19 65.36
## 345 LP002297 1 0.19 65.55
## 346 LP002300 1 0.19 65.74
## 347 LP002301 1 0.19 65.93
## 348 LP002305 1 0.19 66.12
## 349 LP002308 1 0.19 66.31
## 350 LP002314 1 0.19 66.50
## 351 LP002315 1 0.19 66.69
## 352 LP002317 1 0.19 66.88
## 353 LP002318 1 0.19 67.07
## 354 LP002328 1 0.19 67.26
## 355 LP002332 1 0.19 67.45
## 356 LP002335 1 0.19 67.64
## 357 LP002337 1 0.19 67.83
## 358 LP002341 1 0.19 68.02
## 359 LP002342 1 0.19 68.21
## 360 LP002345 1 0.19 68.40
## 361 LP002347 1 0.19 68.59
## 362 LP002348 1 0.19 68.78
## 363 LP002361 1 0.19 68.97
## 364 LP002364 1 0.19 69.16
## 365 LP002366 1 0.19 69.35
## 366 LP002367 1 0.19 69.54
## 367 LP002368 1 0.19 69.73
## 368 LP002369 1 0.19 69.92
## 369 LP002370 1 0.19 70.11
## 370 LP002377 1 0.19 70.30
## 371 LP002379 1 0.19 70.49
## 372 LP002386 1 0.19 70.68
## 373 LP002387 1 0.19 70.87
## 374 LP002390 1 0.19 71.06
## 375 LP002398 1 0.19 71.25
## 376 LP002403 1 0.19 71.44
## 377 LP002407 1 0.19 71.63
## 378 LP002408 1 0.19 71.82
## 379 LP002409 1 0.19 72.01
## 380 LP002418 1 0.19 72.20
## 381 LP002422 1 0.19 72.39
## 382 LP002429 1 0.19 72.58
## 383 LP002434 1 0.19 72.77
## 384 LP002435 1 0.19 72.96
## 385 LP002443 1 0.19 73.15
## 386 LP002446 1 0.19 73.34
## 387 LP002448 1 0.19 73.53
## 388 LP002449 1 0.19 73.72
## 389 LP002453 1 0.19 73.91
## 390 LP002455 1 0.19 74.10
## 391 LP002459 1 0.19 74.29
## 392 LP002467 1 0.19 74.48
## 393 LP002472 1 0.19 74.67
## 394 LP002473 1 0.19 74.86
## 395 LP002484 1 0.19 75.05
## 396 LP002487 1 0.19 75.24
## 397 LP002489 1 0.19 75.43
## 398 LP002493 1 0.19 75.62
## 399 LP002494 1 0.19 75.81
## 400 LP002500 1 0.19 76.00
## 401 LP002501 1 0.19 76.19
## 402 LP002502 1 0.19 76.38
## 403 LP002505 1 0.19 76.57
## 404 LP002515 1 0.19 76.76
## 405 LP002517 1 0.19 76.95
## 406 LP002519 1 0.19 77.14
## 407 LP002524 1 0.19 77.33
## 408 LP002527 1 0.19 77.52
## 409 LP002529 1 0.19 77.71
## 410 LP002530 1 0.19 77.90
## 411 LP002531 1 0.19 78.09
## 412 LP002534 1 0.19 78.28
## 413 LP002536 1 0.19 78.47
## 414 LP002537 1 0.19 78.66
## 415 LP002541 1 0.19 78.85
## 416 LP002543 1 0.19 79.04
## 417 LP002544 1 0.19 79.23
## 418 LP002545 1 0.19 79.42
## 419 LP002547 1 0.19 79.61
## 420 LP002555 1 0.19 79.80
## 421 LP002556 1 0.19 79.99
## 422 LP002571 1 0.19 80.18
## 423 LP002582 1 0.19 80.37
## 424 LP002585 1 0.19 80.56
## 425 LP002586 1 0.19 80.75
## 426 LP002587 1 0.19 80.94
## 427 LP002600 1 0.19 81.13
## 428 LP002602 1 0.19 81.32
## 429 LP002603 1 0.19 81.51
## 430 LP002606 1 0.19 81.70
## 431 LP002615 1 0.19 81.89
## 432 LP002619 1 0.19 82.08
## 433 LP002622 1 0.19 82.27
## 434 LP002625 1 0.19 82.46
## 435 LP002626 1 0.19 82.65
## 436 LP002634 1 0.19 82.84
## 437 LP002637 1 0.19 83.03
## 438 LP002640 1 0.19 83.22
## 439 LP002643 1 0.19 83.41
## 440 LP002648 1 0.19 83.60
## 441 LP002652 1 0.19 83.79
## 442 LP002659 1 0.19 83.98
## 443 LP002670 1 0.19 84.17
## 444 LP002682 1 0.19 84.36
## 445 LP002683 1 0.19 84.55
## 446 LP002684 1 0.19 84.74
## 447 LP002689 1 0.19 84.93
## 448 LP002690 1 0.19 85.12
## 449 LP002692 1 0.19 85.31
## 450 LP002693 1 0.19 85.50
## 451 LP002699 1 0.19 85.69
## 452 LP002705 1 0.19 85.88
## 453 LP002706 1 0.19 86.07
## 454 LP002714 1 0.19 86.26
## 455 LP002716 1 0.19 86.45
## 456 LP002720 1 0.19 86.64
## 457 LP002723 1 0.19 86.83
## 458 LP002731 1 0.19 87.02
## 459 LP002732 1 0.19 87.21
## 460 LP002734 1 0.19 87.40
## 461 LP002738 1 0.19 87.59
## 462 LP002739 1 0.19 87.78
## 463 LP002740 1 0.19 87.97
## 464 LP002741 1 0.19 88.16
## 465 LP002743 1 0.19 88.35
## 466 LP002753 1 0.19 88.54
## 467 LP002755 1 0.19 88.73
## 468 LP002767 1 0.19 88.92
## 469 LP002768 1 0.19 89.11
## 470 LP002772 1 0.19 89.30
## 471 LP002776 1 0.19 89.49
## 472 LP002777 1 0.19 89.68
## 473 LP002785 1 0.19 89.87
## 474 LP002788 1 0.19 90.06
## 475 LP002789 1 0.19 90.25
## 476 LP002792 1 0.19 90.44
## 477 LP002795 1 0.19 90.63
## 478 LP002798 1 0.19 90.82
## 479 LP002804 1 0.19 91.01
## 480 LP002807 1 0.19 91.20
## 481 LP002813 1 0.19 91.39
## 482 LP002820 1 0.19 91.58
## 483 LP002821 1 0.19 91.77
## 484 LP002832 1 0.19 91.96
## 485 LP002836 1 0.19 92.15
## 486 LP002837 1 0.19 92.34
## 487 LP002840 1 0.19 92.53
## 488 LP002841 1 0.19 92.72
## 489 LP002842 1 0.19 92.91
## 490 LP002847 1 0.19 93.10
## 491 LP002855 1 0.19 93.29
## 492 LP002862 1 0.19 93.48
## 493 LP002863 1 0.19 93.67
## 494 LP002868 1 0.19 93.86
## 495 LP002872 1 0.19 94.05
## 496 LP002874 1 0.19 94.24
## 497 LP002877 1 0.19 94.43
## 498 LP002888 1 0.19 94.62
## 499 LP002892 1 0.19 94.81
## 500 LP002893 1 0.19 95.00
## 501 LP002894 1 0.19 95.19
## 502 LP002911 1 0.19 95.38
## 503 LP002912 1 0.19 95.57
## 504 LP002916 1 0.19 95.76
## 505 LP002917 1 0.19 95.95
## 506 LP002925 1 0.19 96.14
## 507 LP002926 1 0.19 96.33
## 508 LP002928 1 0.19 96.52
## 509 LP002931 1 0.19 96.71
## 510 LP002933 1 0.19 96.90
## 511 LP002936 1 0.19 97.09
## 512 LP002938 1 0.19 97.28
## 513 LP002940 1 0.19 97.47
## 514 LP002941 1 0.19 97.66
## 515 LP002943 1 0.19 97.85
## 516 LP002945 1 0.19 98.04
## 517 LP002948 1 0.19 98.23
## 518 LP002950 1 0.19 98.42
## 519 LP002953 1 0.19 98.61
## 520 LP002958 1 0.19 98.80
## 521 LP002959 1 0.19 98.99
## 522 LP002961 1 0.19 99.18
## 523 LP002964 1 0.19 99.37
## 524 LP002974 1 0.19 99.56
## 525 LP002978 1 0.19 99.75
## 526 LP002979 1 0.19 99.94
## 527 LP002983 1 0.19 100.13
## 528 LP002984 1 0.19 100.32
## 529 LP002990 1 0.19 100.00
## Gender frequency percentage cumulative_perc
## 1 Male 422 79.77 79.77
## 2 Female 95 17.96 97.73
## 3 12 2.27 100.00
## Married frequency percentage cumulative_perc
## 1 Yes 339 64.08 64.08
## 2 No 188 35.54 99.62
## 3 2 0.38 100.00
## Dependents frequency percentage cumulative_perc
## 1 0 295 55.77 55.77
## 2 2 92 17.39 73.16
## 3 1 85 16.07 89.23
## 4 3+ 45 8.51 97.74
## 5 12 2.27 100.00
## Education frequency percentage cumulative_perc
## 1 Graduate 421 79.58 79.58
## 2 Not Graduate 108 20.42 100.00
## Self_Employed frequency percentage cumulative_perc
## 1 No 434 82.04 82.04
## 2 Yes 70 13.23 95.27
## 3 25 4.73 100.00
## Property_Area frequency percentage cumulative_perc
## 1 Semiurban 209 39.51 39.51
## 2 Urban 165 31.19 70.70
## 3 Rural 155 29.30 100.00
## Loan_Status frequency percentage cumulative_perc
## 1 Y 366 69.19 69.19
## 2 N 163 30.81 100.00
## data
##
## 13 Variables 529 Observations
## --------------------------------------------------------------------------------
## Loan_ID
## n missing distinct
## 529 0 529
##
## lowest : LP001003 LP001005 LP001006 LP001008 LP001011
## highest: LP002978 LP002979 LP002983 LP002984 LP002990
## --------------------------------------------------------------------------------
## Gender
## n missing distinct
## 517 12 2
##
## Value Female Male
## Frequency 95 422
## Proportion 0.184 0.816
## --------------------------------------------------------------------------------
## Married
## n missing distinct
## 527 2 2
##
## Value No Yes
## Frequency 188 339
## Proportion 0.357 0.643
## --------------------------------------------------------------------------------
## Dependents
## n missing distinct
## 517 12 4
##
## Value 0 1 2 3+
## Frequency 295 85 92 45
## Proportion 0.571 0.164 0.178 0.087
## --------------------------------------------------------------------------------
## Education
## n missing distinct
## 529 0 2
##
## Value Graduate Not Graduate
## Frequency 421 108
## Proportion 0.796 0.204
## --------------------------------------------------------------------------------
## Self_Employed
## n missing distinct
## 504 25 2
##
## Value No Yes
## Frequency 434 70
## Proportion 0.861 0.139
## --------------------------------------------------------------------------------
## ApplicantIncome
## n missing distinct Info Mean Gmd .05 .10
## 529 0 442 1 5508 4325 1927 2275
## .25 .50 .75 .90 .95
## 2900 3816 5815 9542 14643
##
## lowest : 150 210 645 674 1000, highest: 39147 39999 51763 63337 81000
## --------------------------------------------------------------------------------
## CoapplicantIncome
## n missing distinct Info Mean Gmd .05 .10
## 529 0 245 0.909 1542 2001 0 0
## .25 .50 .75 .90 .95
## 0 1086 2232 3670 4890
##
## lowest : 0.00 16.12 189.00 240.00 242.00
## highest: 8980.00 10968.00 11300.00 20000.00 33837.00
## --------------------------------------------------------------------------------
## LoanAmount
## n missing distinct Info Mean Gmd .05 .10
## 529 0 194 1 145.9 78.82 55.0 71.0
## .25 .50 .75 .90 .95
## 100.0 128.0 167.0 234.4 294.4
##
## lowest : 9 17 25 26 30, highest: 496 500 570 600 700
## --------------------------------------------------------------------------------
## Loan_Amount_Term
## n missing distinct Info Mean Gmd
## 529 0 9 0.376 342.4 43.83
##
## lowest : 36 60 84 120 180, highest: 180 240 300 360 480
##
## Value 36 60 84 120 180 240 300 360 480
## Frequency 2 2 3 3 41 2 10 452 14
## Proportion 0.004 0.004 0.006 0.006 0.078 0.004 0.019 0.854 0.026
## --------------------------------------------------------------------------------
## Credit_History
## n missing distinct Info Sum Mean Gmd
## 529 0 2 0.381 450 0.8507 0.2546
##
## --------------------------------------------------------------------------------
## Property_Area
## n missing distinct
## 529 0 3
##
## Value Rural Semiurban Urban
## Frequency 155 209 165
## Proportion 0.293 0.395 0.312
## --------------------------------------------------------------------------------
## Loan_Status
## n missing distinct
## 529 0 2
##
## Value N Y
## Frequency 163 366
## Proportion 0.308 0.692
## --------------------------------------------------------------------------------
Tugas 4
Lakukan pemeriksaan distribusi densitas pada setiap variabel kuantitatif menggunakan R dan Python dengan beberapa bagian sebagai berikut:
4.1 Univariat numerik
4.1.1 ApplicantIncome
df <- read.csv("loan-train.csv")
hist(df$ApplicantIncome, main ="",
col = "blue",
freq = FALSE,
xlab = "")
curve(dnorm(x,
mean=mean(Quan$ApplicantIncome),
sd=sd(Quan$ApplicantIncome)),
add=TRUE,
col="black",
lwd=3)library(visualize)
par(mfrow=c(2,2))
visualize.norm(stat=1, mu=mean(Quan$ApplicantIncome), sd=sd(Quan$ApplicantIncome), section="lower")
visualize.norm(stat=c(3,3000),mu=mean(Quan$ApplicantIncome), sd=sd(Quan$ApplicantIncome),section="bounded")
visualize.norm(stat=1,mu=mean(Quan$ApplicantIncome), sd=sd(Quan$ApplicantIncome),section="upper")4.1.2 LoanAmount
df <- read.csv("loan-train.csv")
hist(df$LoanAmount, main ="",
col = "blue",
freq = FALSE,
xlab = "")
curve(dnorm(x,
mean=mean(Quan$LoanAmount),
sd=sd(Quan$LoanAmount)),
add=TRUE,
col="black",
lwd=3)library(visualize)
par(mfrow=c(2,2))
visualize.norm(stat=1, mu=mean(Quan$LoanAmount), sd=sd(Quan$LoanAmount), section="lower")
visualize.norm(stat=c(3,50),mu=mean(Quan$LoanAmount), sd=sd(Quan$LoanAmount),section="bounded")
visualize.norm(stat=1,mu=mean(Quan$LoanAmount), sd=sd(Quan$LoanAmount),section="upper")4.1.3 Credit_History
df <- read.csv("loan-train.csv")
hist(df$Credit_History, main ="",
col = "blue",
freq = FALSE,
xlab = "")
curve(dnorm(x,
mean=mean(Quan$Credit_History),
sd=sd(Quan$Credit_History)),
add=TRUE,
col="black",
lwd=3)library(visualize)
par(mfrow=c(2,2))
visualize.norm(stat=1, mu=mean(Quan$Credit_History), sd=sd(Quan$Credit_History), section="lower")
visualize.norm(stat=c(0.3,0.6),mu=mean(Quan$Credit_History), sd=sd(Quan$Credit_History),section="bounded")
visualize.norm(stat=1,mu=mean(Quan$Credit_History), sd=sd(Quan$Credit_History),section="upper")4.1.4 CoapplicantIncome
df <- read.csv("loan-train.csv")
hist(df$CoapplicantIncome, main ="",
col = "blue",
freq = FALSE,
xlab = "")
curve(dnorm(x,
mean=mean(Quan$CoapplicantIncome),
sd=sd(Quan$CoapplicantIncome)),
add=TRUE,
col="black",
lwd=3)library(visualize)
par(mfrow=c(2,2))
visualize.norm(stat=1, mu=mean(Quan$CoapplicantIncome), sd=sd(Quan$CoapplicantIncome), section="lower")
visualize.norm(stat=c(3,2000),mu=mean(Quan$CoapplicantIncome), sd=sd(Quan$CoapplicantIncome),section="bounded")
visualize.norm(stat=1,mu=mean(Quan$CoapplicantIncome), sd=sd(Quan$CoapplicantIncome),section="upper")4.1.5 Loan_Amount_Term
df <- read.csv("loan-train.csv")
hist(df$Loan_Amount_Term, main ="",
col = "blue",
freq = FALSE,
xlab = "")
curve(dnorm(x,
mean=mean(Quan$Loan_Amount_Term),
sd=sd(Quan$Loan_Amount_Term)),
add=TRUE,
col="black",
lwd=3)library(visualize)
par(mfrow=c(2,2))
visualize.norm(stat=250, mu=mean(Quan$Loan_Amount_Term), sd=sd(Quan$Loan_Amount_Term), section="lower")
visualize.norm(stat=c(300,400),mu=mean(Quan$Loan_Amount_Term), sd=sd(Quan$Loan_Amount_Term),section="bounded")
visualize.norm(stat=250,mu=mean(Quan$Loan_Amount_Term), sd=sd(Quan$Loan_Amount_Term),section="upper")4.2 Bivariat numerik
4.2.1 ApplicantIncome dan CoapplicantIncome
df1 <- ggplot(df, aes(x = ApplicantIncome, y =CoapplicantIncome)) +
geom_point(alpha = .5) +
geom_density_2d()
df14.2.2 ApplicantIncome dan LoanAmount
df1 <- ggplot(df, aes(x = ApplicantIncome, y = LoanAmount)) +
geom_point(alpha = .5) +
geom_density_2d()
df14.2.3 CoapplicantIncome dan LoanAmount
df1 <- ggplot(df, aes(x = CoapplicantIncome, y = LoanAmount)) +
geom_point(alpha = .5) +
geom_density_2d()
df14.3 Multivariat numerik
library(carData) # for dataset
library(ggplot2) # for visulization
data(df, package="carData")
ggplot(df, aes(x = ApplicantIncome,
y = LoanAmount,
color=Credit_History)) +
geom_point() +
theme_minimal() +
labs(title = "Loan Amount by Applicant Income and Credit History")Tugas 5
Lakukan proses pengujian Hipotesis menggunakan R dan Python pada setiap variabel kuantitatif dengan beberapa bagian sebagai berikut:
5.1 Margin of Error dan Estimasi Interval
- Hitunglah margin of error dan estimasi interval untuk proporsi peminjam bejenis kelamin perempuan dalam pada tingkat kepercayaan 95%.
library(MASS)
k = sum(df$Gender == "Female")
n = length(df$Gender)
pbar = k/n
SE = sqrt(pbar*(1-pbar)/n); SE## [1] 0.01558505
5.1.1 Margin of error
E = qnorm(.975)*SE; E ## [1] 0.03054614
5.1.2 Estimasi Interval
library(stats)
prop.test(k, n)##
## 1-sample proportions test with continuity correction
##
## data: k out of n, null probability 0.5
## X-squared = 246.45, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.1531133 0.2157616
## sample estimates:
## p
## 0.1824104
5.2 Ukuran Sampel
- Jika anda berencana menggunakan perkiraan proporsi 50% data konsumen berjenis kelamin perempuan, temukan ukuran sampel yang diperlukan untuk mencapai margin kesalahan 5% untuk data obeservasi pada tingkat kepercayaan 95%.
zstar = qnorm(.975)
p = 0.5
E = 0.05
zstar^2*p*(1-p)/E^2## [1] 384.1459
5.3 Pembuktian Kebenaran
- Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, jika Bank mengklaim bahwa pinjaman rata-rata konsumen adalah:
5.3.1 Lebih besar $ 150
df <- read.csv("loan-train.csv")
df <- na.omit(df)
mu0 = 150 # hypothesized value
xbar = mean(Quan$LoanAmount) # sample mean
sigma = sd(Quan$LoanAmount) # sample standard deviation
n = length(df$LoanAmount) # sample size
z = (xbar-mu0)/(sigma/sqrt(n));z # test statistic ## [1] -1.134147
alpha = .05 # .05 significance level
z.alpha = qnorm(1-alpha) # right tail critical value
z.alpha## [1] 1.644854
5.3.2 Lebih kecil $ 150
-z.alpha # left tail critical value ## [1] -1.644854
5.3.3 Sama dengan $ 150
alpha = .05 # .05 significance level
z.half.alpha = qnorm(1-alpha/2) # per-one tail .025 significance level
c(-z.half.alpha, z.half.alpha) # Two-Tailed 0.05 significance level ## [1] -1.959964 1.959964
5.4 Pembuktian Kebenaran Jika std 85
- Lakukan pembuktian kebenaran assumsi dengan tingakat signifikansi 0.05, seperti diatas jika diketahui simpangan baku pinjaman adalah $ 85.
5.3.1 Lebih besar $ 150
df <- read.csv("loan-train.csv")
df <- na.omit(df)
mu0 = 150 # hypothesized value
xbar = mean(Quan$LoanAmount) # sample mean
sigma = 85 # sample standard deviation
n = length(df$LoanAmount) # sample size
z = (xbar-mu0)/(sigma/sqrt(n));
z # test statistic ## [1] -1.122251
alpha = .05 # .05 significance level
z.alpha = qnorm(1-alpha) # right tail critical value
z.alpha## [1] 1.644854
5.3.2 Lebih kecil $ 150
-z.alpha # left tail critical value ## [1] -1.644854
5.3.3 Sama dengan $ 150
alpha = .05 # .05 significance level
z.half.alpha = qnorm(1-alpha/2) # per-one tail .025 significance level
c(-z.half.alpha, z.half.alpha) # Two-Tailed 0.05 significance level ## [1] -1.959964 1.959964