Libraries & Data

require(PRROC)
## Loading required package: PRROC
## Warning: package 'PRROC' was built under R version 4.3.2
require(caret)
## Loading required package: caret
## Warning: package 'caret' was built under R version 4.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.2
mydata=read.csv('c:/users/lfult/documents/_courses/w/week 8.csv', stringsAsFactors = T)

Convert Factor to 0/1

mydata$DEFAULT=as.numeric(mydata$DEFAULT)-1 

Set Pseudo-Random Seed

set.seed(1234)

Train Test Split

mys=sample(seq(1:nrow(mydata)),.7*nrow(mydata))
train=mydata[mys,]
test=mydata[-mys,]

Run Logistic Regression

myglm=glm(DEFAULT~., data=train, family='binomial')

Predict

mypred=predict(myglm, test, type='response')
mypred2=round(mypred,0)

Confusion Matrix

confusionMatrix(as.factor(mypred2), as.factor(test$DEFAULT), positive='1')
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 21  2
##          1  2  5
##                                           
##                Accuracy : 0.8667          
##                  95% CI : (0.6928, 0.9624)
##     No Information Rate : 0.7667          
##     P-Value [Acc > NIR] : 0.1381          
##                                           
##                   Kappa : 0.6273          
##                                           
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.7143          
##             Specificity : 0.9130          
##          Pos Pred Value : 0.7143          
##          Neg Pred Value : 0.9130          
##              Prevalence : 0.2333          
##          Detection Rate : 0.1667          
##    Detection Prevalence : 0.2333          
##       Balanced Accuracy : 0.8137          
##                                           
##        'Positive' Class : 1               
## 

Precision Recall Curve

pos_scores=mypred[test$DEFAULT==1]
neg_scores=mypred[test$DEFAULT==0]
plot(pr.curve(scores.class0=neg_scores, scores.class1=pos_scores, curve=TRUE))