# Clear environment of variables and functions
rm(list = ls(all = TRUE))
# Clear environmet of packages
if(is.null(sessionInfo()$otherPkgs) == FALSE)lapply(paste("package:", names(sessionInfo()$otherPkgs), sep=""), detach, character.only = TRUE, unload = TRUE)
#load package
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
library(janitor) # for tyble
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lmPerm) # for ANOVA
library(formattable)# For table formatting and table formatting functions
library(htmltools)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(MultinomialCI) # To calculate multinomial confidence intervals for factor variables
library(flexdashboard)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:Hmisc':
##
## subplot
## The following object is masked from 'package:formattable':
##
## style
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dygraphs)
library(xts) # to convert date data to xts data, xts is time series class
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(gganimate)
library(inspectdf)# Load auto EDA packages
#Import Data
df <- read.csv("MLD_Data_File.csv", header=TRUE)
#Look at the first 10 rows of data
df$LOANPRC = df$LOANPRC*100
head(df,10)
## MARRIED GDLIN OBRAT BLACK HISPAN MALE APPROVE LOANPRC
## 1 0 1 34.5 0 0 . 1 75.42373
## 2 1 1 34.1 0 0 1 0 80.00000
## 3 0 1 26.0 0 0 1 1 89.51049
## 4 1 1 37.0 0 0 1 1 60.00000
## 5 1 1 32.1 0 0 1 1 89.55224
## 6 0 1 33.0 0 0 1 1 80.43478
## 7 0 1 36.0 0 0 1 1 89.80892
## 8 0 1 37.0 0 0 1 1 79.76879
## 9 1 1 30.7 0 0 1 1 100.30090
## 10 1 1 49.0 0 0 1 1 63.27014
df <- subset(df,MARRIED != "." & LOANPRC <= 100 & (GDLIN == 1 | GDLIN == 0))
head(df,10)
## MARRIED GDLIN OBRAT BLACK HISPAN MALE APPROVE LOANPRC
## 1 0 1 34.5 0 0 . 1 75.42373
## 2 1 1 34.1 0 0 1 0 80.00000
## 3 0 1 26.0 0 0 1 1 89.51049
## 4 1 1 37.0 0 0 1 1 60.00000
## 5 1 1 32.1 0 0 1 1 89.55224
## 6 0 1 33.0 0 0 1 1 80.43478
## 7 0 1 36.0 0 0 1 1 89.80892
## 8 0 1 37.0 0 0 1 1 79.76879
## 10 1 1 49.0 0 0 1 1 63.27014
## 11 1 1 37.0 0 0 1 1 50.47591
summary(df)
## MARRIED GDLIN OBRAT BLACK
## .: 0 Min. :0.0000 Min. : 0.00 Min. :0.00000
## 0: 669 1st Qu.:1.0000 1st Qu.:28.00 1st Qu.:0.00000
## 1:1283 Median :1.0000 Median :33.00 Median :0.00000
## Mean :0.9144 Mean :32.37 Mean :0.09939
## 3rd Qu.:1.0000 3rd Qu.:37.00 3rd Qu.:0.00000
## Max. :1.0000 Max. :95.00 Max. :1.00000
## HISPAN MALE APPROVE LOANPRC
## Min. :0.0000 .: 15 Min. :0.0000 Min. : 2.105
## 1st Qu.:0.0000 0: 361 1st Qu.:1.0000 1st Qu.: 69.750
## Median :0.0000 1:1576 Median :1.0000 Median : 80.000
## Mean :0.0543 Mean :0.8796 Mean : 76.072
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.: 89.820
## Max. :1.0000 Max. :1.0000 Max. :100.000
#df$MARRIED <- as.double(df$MARRIED)
#df$MALE <- as.double(df$MALE)
#head(df,10)
#change column names to the easy understood ones
colnames(df) = c('MARRIED','Meet_Guidelines','Obligation','BLACK','HISPAN','MALE','APPROVE','Loan_Purchase')
head(df,10)
## MARRIED Meet_Guidelines Obligation BLACK HISPAN MALE APPROVE
## 1 0 1 34.5 0 0 . 1
## 2 1 1 34.1 0 0 1 0
## 3 0 1 26.0 0 0 1 1
## 4 1 1 37.0 0 0 1 1
## 5 1 1 32.1 0 0 1 1
## 6 0 1 33.0 0 0 1 1
## 7 0 1 36.0 0 0 1 1
## 8 0 1 37.0 0 0 1 1
## 10 1 1 49.0 0 0 1 1
## 11 1 1 37.0 0 0 1 1
## Loan_Purchase
## 1 75.42373
## 2 80.00000
## 3 89.51049
## 4 60.00000
## 5 89.55224
## 6 80.43478
## 7 89.80892
## 8 79.76879
## 10 63.27014
## 11 50.47591
summary(df)
## MARRIED Meet_Guidelines Obligation BLACK
## .: 0 Min. :0.0000 Min. : 0.00 Min. :0.00000
## 0: 669 1st Qu.:1.0000 1st Qu.:28.00 1st Qu.:0.00000
## 1:1283 Median :1.0000 Median :33.00 Median :0.00000
## Mean :0.9144 Mean :32.37 Mean :0.09939
## 3rd Qu.:1.0000 3rd Qu.:37.00 3rd Qu.:0.00000
## Max. :1.0000 Max. :95.00 Max. :1.00000
## HISPAN MALE APPROVE Loan_Purchase
## Min. :0.0000 .: 15 Min. :0.0000 Min. : 2.105
## 1st Qu.:0.0000 0: 361 1st Qu.:1.0000 1st Qu.: 69.750
## Median :0.0000 1:1576 Median :1.0000 Median : 80.000
## Mean :0.0543 Mean :0.8796 Mean : 76.072
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.: 89.820
## Max. :1.0000 Max. :1.0000 Max. :100.000
#correlation table
df %>%
select_if(is.numeric) %>%
as.matrix() %>%
rcorr()
## Meet_Guidelines Obligation BLACK HISPAN APPROVE
## Meet_Guidelines 1.00 -0.15 -0.22 -0.04 0.62
## Obligation -0.15 1.00 0.11 0.03 -0.17
## BLACK -0.22 0.11 1.00 -0.08 -0.21
## HISPAN -0.04 0.03 -0.08 1.00 -0.07
## APPROVE 0.62 -0.17 -0.21 -0.07 1.00
## Loan_Purchase -0.13 0.21 0.13 0.11 -0.14
## Loan_Purchase
## Meet_Guidelines -0.13
## Obligation 0.21
## BLACK 0.13
## HISPAN 0.11
## APPROVE -0.14
## Loan_Purchase 1.00
##
## n= 1952
##
##
## P
## Meet_Guidelines Obligation BLACK HISPAN APPROVE
## Meet_Guidelines 0.0000 0.0000 0.0783 0.0000
## Obligation 0.0000 0.0000 0.2364 0.0000
## BLACK 0.0000 0.0000 0.0004 0.0000
## HISPAN 0.0783 0.2364 0.0004 0.0017
## APPROVE 0.0000 0.0000 0.0000 0.0017
## Loan_Purchase 0.0000 0.0000 0.0000 0.0000 0.0000
## Loan_Purchase
## Meet_Guidelines 0.0000
## Obligation 0.0000
## BLACK 0.0000
## HISPAN 0.0000
## APPROVE 0.0000
## Loan_Purchase
## correlation graph
df %>%
select(MARRIED,Obligation,MALE,APPROVE,Loan_Purchase, HISPAN, BLACK) %>%
inspect_cor() %>%
show_plot()
library(DataExplorer)
df %>%
plot_bar()
Comments:
“.” appears in Married and Male
df %>%
plot_histogram()
comments: 1. Loan purchase value shouldn’t over 1
df_white <- subset(df, MALE != "." & HISPAN == 0 & BLACK == 0)
head(df_white,10)
## MARRIED Meet_Guidelines Obligation BLACK HISPAN MALE APPROVE
## 2 1 1 34.1 0 0 1 0
## 3 0 1 26.0 0 0 1 1
## 4 1 1 37.0 0 0 1 1
## 5 1 1 32.1 0 0 1 1
## 6 0 1 33.0 0 0 1 1
## 7 0 1 36.0 0 0 1 1
## 8 0 1 37.0 0 0 1 1
## 10 1 1 49.0 0 0 1 1
## 11 1 1 37.0 0 0 1 1
## 12 1 1 37.1 0 0 1 1
## Loan_Purchase
## 2 80.00000
## 3 89.51049
## 4 60.00000
## 5 89.55224
## 6 80.43478
## 7 89.80892
## 8 79.76879
## 10 63.27014
## 11 50.47591
## 12 70.37037
df_hisp <- subset(df, MALE != "." & HISPAN == 1)
head(df_hisp,10)
## MARRIED Meet_Guidelines Obligation BLACK HISPAN MALE APPROVE
## 30 1 1 34.8 0 1 1 1
## 49 1 0 30.0 0 1 1 0
## 54 0 1 38.0 0 1 0 1
## 56 0 1 25.0 0 1 1 1
## 69 1 0 22.0 0 1 1 1
## 74 1 1 39.7 0 1 1 1
## 100 1 1 22.0 0 1 0 1
## 111 1 1 36.0 0 1 0 1
## 116 0 1 32.2 0 1 0 1
## 120 1 1 19.0 0 1 1 0
## Loan_Purchase
## 30 80.00000
## 49 94.91525
## 54 89.39394
## 56 79.20792
## 69 80.00000
## 74 84.06780
## 100 85.33334
## 111 90.00000
## 116 90.30303
## 120 76.31161
df_black <- subset(df,MALE != "." & BLACK == 1)
head(df_black,10)
## MARRIED Meet_Guidelines Obligation BLACK HISPAN MALE APPROVE
## 16 1 1 37.1 1 0 1 1
## 27 1 1 34.0 1 0 1 1
## 35 0 0 38.2 1 0 0 0
## 36 0 1 27.0 1 0 0 1
## 46 1 0 34.0 1 0 1 0
## 50 0 1 38.2 1 0 1 1
## 51 1 1 35.0 1 0 1 1
## 80 0 1 34.0 1 0 0 1
## 81 1 1 38.9 1 0 1 1
## 86 0 1 25.0 1 0 0 1
## Loan_Purchase
## 16 89.74359
## 27 80.00000
## 35 95.00000
## 36 75.15528
## 46 80.00000
## 50 67.82609
## 51 95.56962
## 80 88.59060
## 81 95.29412
## 86 80.00000
summary (df_white)
## MARRIED Meet_Guidelines Obligation BLACK HISPAN
## .: 0 Min. :0.0000 Min. : 0.00 Min. :0 Min. :0
## 0: 557 1st Qu.:1.0000 1st Qu.:27.60 1st Qu.:0 1st Qu.:0
## 1:1084 Median :1.0000 Median :32.50 Median :0 Median :0
## Mean :0.9391 Mean :31.99 Mean :0 Mean :0
## 3rd Qu.:1.0000 3rd Qu.:36.50 3rd Qu.:0 3rd Qu.:0
## Max. :1.0000 Max. :95.00 Max. :0 Max. :0
## MALE APPROVE Loan_Purchase
## .: 0 Min. :0.0000 Min. : 2.105
## 0: 291 1st Qu.:1.0000 1st Qu.: 67.708
## 1:1350 Median :1.0000 Median : 79.861
## Mean :0.9098 Mean : 74.782
## 3rd Qu.:1.0000 3rd Qu.: 89.147
## Max. :1.0000 Max. :100.000
grid.arrange(
df_white %>%
ggplot(df_white, mapping = aes(APPROVE))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 1.5, size = 3.0, color = "white") +
scale_y_continuous(labels = scales::percent) +
labs(y= " "),
df_white %>%
filter(df_white$Meet_Guidelines != 666)%>%
ggplot(df_white, mapping = aes(Meet_Guidelines, fill=Meet_Guidelines))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 1, size = 2.9,color = "white" ) +
scale_y_continuous(labels = scales::percent) +
labs(y= " "),
df_white %>%
ggplot(df_white, mapping = aes(MALE))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 3, size = 3.0) +
scale_y_continuous(labels = scales::percent) +
labs(y= " "),
df_white %>%
ggplot(df_white, mapping = aes(MARRIED))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 5, size = 3.0) +
scale_y_continuous(labels = scales::percent) +
labs(y= " ")
, ncol = 2)
summary (df_hisp)
## MARRIED Meet_Guidelines Obligation BLACK HISPAN MALE
## .: 0 Min. :0.0000 Min. :14.60 Min. :0 Min. :1 .: 0
## 0:30 1st Qu.:1.0000 1st Qu.:29.00 1st Qu.:0 1st Qu.:1 0:20
## 1:74 Median :1.0000 Median :33.00 Median :0 Median :1 1:84
## Mean :0.8654 Mean :33.32 Mean :0 Mean :1
## 3rd Qu.:1.0000 3rd Qu.:38.05 3rd Qu.:0 3rd Qu.:1
## Max. :1.0000 Max. :62.00 Max. :0 Max. :1
## APPROVE Loan_Purchase
## Min. :0.0000 Min. : 40.09
## 1st Qu.:1.0000 1st Qu.: 80.00
## Median :1.0000 Median : 89.31
## Mean :0.7788 Mean : 83.91
## 3rd Qu.:1.0000 3rd Qu.: 90.27
## Max. :1.0000 Max. :100.00
grid.arrange(
df_hisp %>%
ggplot(df_hisp, mapping = aes(APPROVE))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 1.5, size = 3.0, color = "white") +
scale_y_continuous(labels = scales::percent) +
labs(y= " "),
df_hisp %>%
ggplot(df_hisp, mapping = aes(Meet_Guidelines, fill=Meet_Guidelines))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 1.5, size = 3.0,color = "white" ) +
scale_y_continuous(labels = scales::percent) +
labs(y= " "),
df_hisp %>%
ggplot(df_hisp, mapping = aes(MALE))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 3, size = 3.0) +
scale_y_continuous(labels = scales::percent) +
labs(y= " "),
df_hisp %>%
ggplot(df_hisp, mapping = aes(MARRIED))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 5, size = 3.0) +
scale_y_continuous(labels = scales::percent) +
labs(y= " ")
, ncol = 2)
summary (df_black)
## MARRIED Meet_Guidelines Obligation BLACK HISPAN MALE
## .: 0 Min. :0.000 Min. : 5.60 Min. :1 Min. :0 .: 0
## 0: 75 1st Qu.:0.000 1st Qu.:31.00 1st Qu.:1 1st Qu.:0 0: 50
## 1:117 Median :1.000 Median :35.00 Median :1 Median :0 1:142
## Mean :0.724 Mean :35.03 Mean :1 Mean :0
## 3rd Qu.:1.000 3rd Qu.:38.90 3rd Qu.:1 3rd Qu.:0
## Max. :1.000 Max. :63.00 Max. :1 Max. :0
## APPROVE Loan_Purchase
## Min. :0.0000 Min. : 28.99
## 1st Qu.:0.0000 1st Qu.: 80.00
## Median :1.0000 Median : 86.06
## Mean :0.6667 Mean : 82.89
## 3rd Qu.:1.0000 3rd Qu.: 90.23
## Max. :1.0000 Max. :100.00
grid.arrange(
df_black %>%
ggplot(df_black, mapping = aes(APPROVE))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 1.5, size = 3.0, color = "white") +
scale_y_continuous(labels = scales::percent) +
labs(y= " "),
df_black %>%
ggplot(df_black, mapping = aes(Meet_Guidelines, fill=Meet_Guidelines))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 1.5, size = 3.0,color = "white" ) +
scale_y_continuous(labels = scales::percent) +
labs(y= " "),
df_black %>%
ggplot(df_black, mapping = aes(MALE))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 3, size = 3.0) +
scale_y_continuous(labels = scales::percent) +
labs(y= " "),
df_black %>%
ggplot(df_black, mapping = aes(MARRIED))+
geom_bar(aes(y = (..count..)/sum(..count..))) +
geom_text(aes(label = ((..count..)/sum(..count..))*100,
y= ..prop.. ), stat='count',vjust = 5, size = 3.0) +
scale_y_continuous(labels = scales::percent) +
labs(y= " ")
, ncol = 2)
#Estimate Logit Model
LogitModel = glm(APPROVE ~ MARRIED + Meet_Guidelines + Obligation + BLACK + HISPAN + Loan_Purchase, data = df,
family = "binomial")
summary(LogitModel)
##
## Call:
## glm(formula = APPROVE ~ MARRIED + Meet_Guidelines + Obligation +
## BLACK + HISPAN + Loan_Purchase, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8975 0.2423 0.3071 0.3683 2.3487
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.253682 0.686366 1.827 0.067768 .
## MARRIED1 0.477739 0.184576 2.588 0.009645 **
## Meet_Guidelines 3.776275 0.220505 17.126 < 2e-16 ***
## Obligation -0.033849 0.010561 -3.205 0.001351 **
## BLACK -0.861500 0.242228 -3.557 0.000376 ***
## HISPAN -0.853023 0.322550 -2.645 0.008178 **
## Loan_Purchase -0.016167 0.007016 -2.304 0.021198 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1435.50 on 1951 degrees of freedom
## Residual deviance: 927.23 on 1945 degrees of freedom
## AIC: 941.23
##
## Number of Fisher Scoring iterations: 6
exp(coef(LogitModel))
## (Intercept) MARRIED1 Meet_Guidelines Obligation
## 3.5032185 1.6124238 43.6531243 0.9667177
## BLACK HISPAN Loan_Purchase
## 0.4225276 0.4261247 0.9839626
#Generate Log-Likelihood
logLik(LogitModel)
## 'log Lik.' -463.6162 (df=7)
#Define prototypical loan applicants (you will need more than 3)
prototype1 <- data.frame(Obligation=median(df$Obligation), MARRIED = "1", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 1, BLACK = 1, HISPAN = 0)
prototype2 <- data.frame(Obligation=median(df$Obligation), MARRIED = "1", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 1, BLACK = 0, HISPAN = 1)
prototype3 <- data.frame(Obligation=median(df$Obligation), MARRIED = "1", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 1, BLACK = 0, HISPAN = 0)
prototype4 <- data.frame(Obligation=median(df$Obligation), MARRIED = "1", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 0, BLACK = 1, HISPAN = 0)
prototype5 <- data.frame(Obligation=median(df$Obligation), MARRIED = "1", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 0, BLACK = 0, HISPAN = 1)
prototype6 <- data.frame(Obligation=median(df$Obligation), MARRIED = "1", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 0, BLACK = 0, HISPAN = 0)
prototype7 <- data.frame(Obligation=median(df$Obligation), MARRIED = "0", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 1, BLACK = 1, HISPAN = 0)
prototype8 <- data.frame(Obligation=median(df$Obligation), MARRIED = "0", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 1, BLACK = 0, HISPAN = 1)
prototype9 <- data.frame(Obligation=median(df$Obligation), MARRIED = "0", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 1, BLACK = 0, HISPAN = 0)
prototype10 <- data.frame(Obligation=median(df$Obligation), MARRIED = "0", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 0, BLACK = 1, HISPAN = 0)
prototype11 <- data.frame(Obligation=median(df$Obligation), MARRIED = "0", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 0, BLACK = 0, HISPAN = 1)
prototype12 <- data.frame(Obligation=median(df$Obligation), MARRIED = "0", Loan_Purchase = median(df$Loan_Purchase),
Meet_Guidelines = 0, BLACK = 0, HISPAN = 0)
#Predict probabilities for prototypical individuals
prototype1$predictedprob <- predict (LogitModel, newdata = prototype1, type ="response")
prototype2$predictedprob <- predict (LogitModel, newdata = prototype2, type ="response")
prototype3$predictedprob <- predict (LogitModel, newdata = prototype3, type ="response")
prototype4$predictedprob <- predict (LogitModel, newdata = prototype4, type ="response")
prototype5$predictedprob <- predict (LogitModel, newdata = prototype5, type ="response")
prototype6$predictedprob <- predict (LogitModel, newdata = prototype6, type ="response")
prototype7$predictedprob <- predict (LogitModel, newdata = prototype7, type ="response")
prototype8$predictedprob <- predict (LogitModel, newdata = prototype8, type ="response")
prototype9$predictedprob <- predict (LogitModel, newdata = prototype9, type ="response")
prototype10$predictedprob <- predict (LogitModel, newdata = prototype10, type ="response")
prototype11$predictedprob <- predict (LogitModel, newdata = prototype11, type ="response")
prototype12$predictedprob <- predict (LogitModel, newdata = prototype12, type ="response")
rbind.data.frame(prototype1,prototype2,prototype3,prototype4,prototype5,prototype6,prototype7,prototype8,prototype9,prototype10,prototype11,prototype12)
## Obligation MARRIED Loan_Purchase Meet_Guidelines BLACK HISPAN
## 1 33 1 80 1 1 0
## 2 33 1 80 1 0 1
## 3 33 1 80 1 0 0
## 4 33 1 80 0 1 0
## 5 33 1 80 0 0 1
## 6 33 1 80 0 0 0
## 7 33 0 80 1 1 0
## 8 33 0 80 1 0 1
## 9 33 0 80 1 0 0
## 10 33 0 80 0 1 0
## 11 33 0 80 0 0 1
## 12 33 0 80 0 0 0
## predictedprob
## 1 0.9034182
## 2 0.9041553
## 3 0.9567810
## 4 0.1764655
## 5 0.1777009
## 6 0.3364891
## 7 0.8529661
## 8 0.8540261
## 9 0.9321097
## 10 0.1173033
## 11 0.1181839
## 12 0.2392642
#Estimate Probit Model
ProbitModel = glm(APPROVE ~ MARRIED + Meet_Guidelines + Obligation + BLACK + HISPAN + Loan_Purchase, data = df,
family = "binomial" (link = "probit"))
summary(ProbitModel)
##
## Call:
## glm(formula = APPROVE ~ MARRIED + Meet_Guidelines + Obligation +
## BLACK + HISPAN + Loan_Purchase, family = binomial(link = "probit"),
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9489 0.2372 0.3058 0.3710 2.2906
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.442016 0.337821 1.308 0.190727
## MARRIED1 0.236340 0.091676 2.578 0.009937 **
## Meet_Guidelines 2.174365 0.122768 17.711 < 2e-16 ***
## Obligation -0.016137 0.005449 -2.962 0.003060 **
## BLACK -0.445506 0.127706 -3.489 0.000486 ***
## HISPAN -0.439135 0.167857 -2.616 0.008893 **
## Loan_Purchase -0.007493 0.003283 -2.282 0.022471 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1435.50 on 1951 degrees of freedom
## Residual deviance: 927.16 on 1945 degrees of freedom
## AIC: 941.16
##
## Number of Fisher Scoring iterations: 6
exp(coef(ProbitModel))
## (Intercept) MARRIED1 Meet_Guidelines Obligation
## 1.5558400 1.2666049 8.7965935 0.9839925
## BLACK HISPAN Loan_Purchase
## 0.6405003 0.6445936 0.9925346
#Generate Log-Likelihood
logLik(ProbitModel)
## 'log Lik.' -463.58 (df=7)
#Predict probabilities for prototypical individuals
prototype1$predictedprob <- predict (ProbitModel, newdata = prototype1, type ="response")
prototype2$predictedprob <- predict (ProbitModel, newdata = prototype2, type ="response")
prototype3$predictedprob <- predict (ProbitModel, newdata = prototype3, type ="response")
prototype4$predictedprob <- predict (ProbitModel, newdata = prototype4, type ="response")
prototype5$predictedprob <- predict (ProbitModel, newdata = prototype5, type ="response")
prototype6$predictedprob <- predict (ProbitModel, newdata = prototype6, type ="response")
prototype7$predictedprob <- predict (ProbitModel, newdata = prototype7, type ="response")
prototype8$predictedprob <- predict (ProbitModel, newdata = prototype8, type ="response")
prototype9$predictedprob <- predict (ProbitModel, newdata = prototype9, type ="response")
prototype10$predictedprob <- predict (ProbitModel, newdata = prototype10, type ="response")
prototype11$predictedprob <- predict (ProbitModel, newdata = prototype11, type ="response")
prototype12$predictedprob <- predict (ProbitModel, newdata = prototype12, type ="response")
rbind.data.frame(prototype1,prototype2,prototype3,prototype4,prototype5,prototype6,prototype7,prototype8,prototype9,prototype10,prototype11,prototype12)
## Obligation MARRIED Loan_Purchase Meet_Guidelines BLACK HISPAN
## 1 33 1 80 1 1 0
## 2 33 1 80 1 0 1
## 3 33 1 80 1 0 0
## 4 33 1 80 0 1 0
## 5 33 1 80 0 0 1
## 6 33 1 80 0 0 0
## 7 33 0 80 1 1 0
## 8 33 0 80 1 0 1
## 9 33 0 80 1 0 0
## 10 33 0 80 0 1 0
## 11 33 0 80 0 0 1
## 12 33 0 80 0 0 0
## predictedprob
## 1 0.8988841
## 2 0.9000065
## 3 0.9573496
## 4 0.1842875
## 5 0.1859887
## 6 0.3250439
## 7 0.8505694
## 8 0.8520460
## 9 0.9311465
## 10 0.1280859
## 11 0.1294245
## 12 0.2451033