self test
mydata <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\R Codes\\5 Logistic Regression\\claimants.csv")
mydata <- mydata[-1]
attach(mydata)
summary(mydata)
## ATTORNEY CLMSEX CLMINSUR SEATBELT
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.00000
## Median :0.0000 Median :1.0000 Median :1.0000 Median :0.00000
## Mean :0.4888 Mean :0.5587 Mean :0.9076 Mean :0.01703
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## NA's :12 NA's :41 NA's :48
## CLMAGE LOSS
## Min. : 0.00 Min. : 0.000
## 1st Qu.: 9.00 1st Qu.: 0.400
## Median :30.00 Median : 1.069
## Mean :28.41 Mean : 3.806
## 3rd Qu.:43.00 3rd Qu.: 3.781
## Max. :95.00 Max. :173.604
## NA's :189
model <- glm(ATTORNEY~., family = "binomial", mydata)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model
##
## Call: glm(formula = ATTORNEY ~ ., family = "binomial", data = mydata)
##
## Coefficients:
## (Intercept) CLMSEX CLMINSUR SEATBELT CLMAGE
## -0.199978 0.432996 0.602173 -0.781079 0.006487
## LOSS
## -0.385044
##
## Degrees of Freedom: 1095 Total (i.e. Null); 1090 Residual
## (244 observations deleted due to missingness)
## Null Deviance: 1516
## Residual Deviance: 1288 AIC: 1300
pred <- predict(model, type= "response", mydata)
pred <- as.data.frame(pred)
final <- cbind(mydata, pred)
confusion <- table(pred>0.5, ATTORNEY)
confusion
## ATTORNEY
## 0 1
## FALSE 380 125
## TRUE 198 393
accuracy <- sum(diag(confusion)/ sum(confusion))
accuracy
## [1] 0.705292
library(ROCR)
## Warning: package 'ROCR' was built under R version 3.5.1
## Loading required package: gplots
## Warning: package 'gplots' was built under R version 3.5.1
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
rocrpred <- prediction(pred, ATTORNEY)
rocrperf <- performance(rocrpred, 'tpr', 'fpr')
plot(rocrperf,colorize=T,text.adj=c(-0.2,1.7))
