Libraries & Data

require(PRROC)

## Loading required package: PRROC

## Warning: package 'PRROC' was built under R version 4.3.2

require(caret)

## Loading required package: caret

## Warning: package 'caret' was built under R version 4.3.2

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.3.2

## Loading required package: lattice

## Warning: package 'lattice' was built under R version 4.3.2

mydata=read.csv('c:/users/lfult/documents/_courses/w/week 8.csv', stringsAsFactors = T)

Convert Factor to 0/1

mydata$DEFAULT=as.numeric(mydata$DEFAULT)-1

Set Pseudo-Random Seed

set.seed(1234)

Train Test Split

mys=sample(seq(1:nrow(mydata)),.7*nrow(mydata))
train=mydata[mys,]
test=mydata[-mys,]

Run Logistic Regression

myglm=glm(DEFAULT~., data=train, family='binomial')

Print results

summary(myglm)

## 
## Call:
## glm(formula = DEFAULT ~ ., family = "binomial", data = train)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -4.827303   1.214251  -3.976 7.02e-05 ***
## BUSAGE       0.001535   0.005244   0.293     0.77    
## DAYSDELQ     0.097326   0.022405   4.344 1.40e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 79.807  on 69  degrees of freedom
## Residual deviance: 40.384  on 67  degrees of freedom
## AIC: 46.384
## 
## Number of Fisher Scoring iterations: 6

Predict

mypred=predict(myglm, test, type='response')
mypred2=round(mypred,0)

Print Table

table(mypred2, test$DEFAULT)

##        
## mypred2  0  1
##       0 21  2
##       1  2  5

Confusion Matrix

confusionMatrix(as.factor(mypred2), as.factor(test$DEFAULT), positive='1')

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 21  2
##          1  2  5
##                                           
##                Accuracy : 0.8667          
##                  95% CI : (0.6928, 0.9624)
##     No Information Rate : 0.7667          
##     P-Value [Acc > NIR] : 0.1381          
##                                           
##                   Kappa : 0.6273          
##                                           
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.7143          
##             Specificity : 0.9130          
##          Pos Pred Value : 0.7143          
##          Neg Pred Value : 0.9130          
##              Prevalence : 0.2333          
##          Detection Rate : 0.1667          
##    Detection Prevalence : 0.2333          
##       Balanced Accuracy : 0.8137          
##                                           
##        'Positive' Class : 1               
##

Precision Recall Curve

pos_scores=mypred[test$DEFAULT==1]
neg_scores=mypred[test$DEFAULT==0]
plot(pr.curve(scores.class0=neg_scores, scores.class1=pos_scores, curve=TRUE))

Week 8

lvf

2024-01-19

Libraries & Data

Convert Factor to 0/1

Set Pseudo-Random Seed

Train Test Split

Run Logistic Regression

Print results

Predict

Print Table

Confusion Matrix

Precision Recall Curve