Libraries & Data
require(PRROC)
## Loading required package: PRROC
## Warning: package 'PRROC' was built under R version 4.3.2
require(caret)
## Loading required package: caret
## Warning: package 'caret' was built under R version 4.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.2
mydata=read.csv('c:/users/lfult/documents/_courses/w/week 8.csv', stringsAsFactors = T)
Convert Factor to 0/1
mydata$DEFAULT=as.numeric(mydata$DEFAULT)-1
Set Pseudo-Random Seed
set.seed(1234)
Train Test Split
mys=sample(seq(1:nrow(mydata)),.7*nrow(mydata))
train=mydata[mys,]
test=mydata[-mys,]
Run Logistic Regression
myglm=glm(DEFAULT~., data=train, family='binomial')
Print results
summary(myglm)
##
## Call:
## glm(formula = DEFAULT ~ ., family = "binomial", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.827303 1.214251 -3.976 7.02e-05 ***
## BUSAGE 0.001535 0.005244 0.293 0.77
## DAYSDELQ 0.097326 0.022405 4.344 1.40e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 79.807 on 69 degrees of freedom
## Residual deviance: 40.384 on 67 degrees of freedom
## AIC: 46.384
##
## Number of Fisher Scoring iterations: 6
Predict
mypred=predict(myglm, test, type='response')
mypred2=round(mypred,0)
Print Table
table(mypred2, test$DEFAULT)
##
## mypred2 0 1
## 0 21 2
## 1 2 5
Confusion Matrix
confusionMatrix(as.factor(mypred2), as.factor(test$DEFAULT), positive='1')
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 21 2
## 1 2 5
##
## Accuracy : 0.8667
## 95% CI : (0.6928, 0.9624)
## No Information Rate : 0.7667
## P-Value [Acc > NIR] : 0.1381
##
## Kappa : 0.6273
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.7143
## Specificity : 0.9130
## Pos Pred Value : 0.7143
## Neg Pred Value : 0.9130
## Prevalence : 0.2333
## Detection Rate : 0.1667
## Detection Prevalence : 0.2333
## Balanced Accuracy : 0.8137
##
## 'Positive' Class : 1
##
Precision Recall Curve
pos_scores=mypred[test$DEFAULT==1]
neg_scores=mypred[test$DEFAULT==0]
plot(pr.curve(scores.class0=neg_scores, scores.class1=pos_scores, curve=TRUE))
