library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
data<-read.csv("https://raw.githubusercontent.com/scottogden10/621_hw/master/classification-output-data.csv")
head(data)
## pregnant glucose diastolic skinfold insulin bmi pedigree age class
## 1 7 124 70 33 215 25.5 0.161 37 0
## 2 2 122 76 27 200 35.9 0.483 26 0
## 3 3 107 62 13 48 22.9 0.678 23 1
## 4 1 91 64 24 0 29.2 0.192 21 0
## 5 4 83 86 19 0 29.3 0.317 34 0
## 6 1 100 74 12 46 19.5 0.149 28 0
## scored.class scored.probability
## 1 0 0.32845226
## 2 0 0.27319044
## 3 0 0.10966039
## 4 0 0.05599835
## 5 0 0.10049072
## 6 0 0.05515460
tab<-table(data$class,data$scored.class)
colnames(tab)<-c("Real Negative","Real Positive")
rownames(tab)<-c("Model Negative","Model Positive")
tab
##
## Real Negative Real Positive
## Model Negative 119 5
## Model Positive 30 27
Confus.Matrix <- function(data) {
tab <- table(data$class,data$scored.class)
colnames(tab)<-c("Real Negative","Real Positive")
rownames(tab)<-c("Model Negative","Model Positive")
return(tab)
}
Confus.Matrix(data=data)
##
## Real Negative Real Positive
## Model Negative 119 5
## Model Positive 30 27
Sens<-function(data) {
tab <- table(data$class,data$scored.class)
tn<-tab[1,1]
tp<-tab[2,2]
fn<-tab[2,1]
fp<-tab[1,2]
sens<-tn/(tn+fp)
return(sens)
}
Spec<-function(data) {
tab <- table(data$class,data$scored.class)
tn<-tab[1,1]
tp<-tab[2,2]
fn<-tab[2,1]
fp<-tab[1,2]
spec<-tp/(tp+fn)
return(spec)
}
CER<-function(data) {
tab <- table(data$class,data$scored.class)
tn<-tab[1,1]
tp<-tab[2,2]
fn<-tab[2,1]
fp<-tab[1,2]
cer<-(fp+fn)/(tp+tn+fn+fp)
return(cer)
}
PREC<-function(data) {
tab <- table(data$class,data$scored.class)
tn<-tab[1,1]
tp<-tab[2,2]
fn<-tab[2,1]
fp<-tab[1,2]
prec<-tp/(tp+fp)
return(prec)
}
ACC<-function(data) {
tab <- table(data$class,data$scored.class)
tn<-tab[1,1]
tp<-tab[2,2]
fn<-tab[2,1]
fp<-tab[1,2]
acc<-(tp+tn)/(tp+tn+fn+fp)
return(acc)
}
F1<-function(data) {
tab <- table(data$class,data$scored.class)
tn<-tab[1,1]
tp<-tab[2,2]
fn<-tab[2,1]
fp<-tab[1,2]
acc<-(tp+tn)/(tp+tn+fn+fp)
cer<-(fp+fn)/(tp+tn+fn+fp)
prec<-tp/(tp+fp)
spec<-tp/(tp+fn)
sens<-tn/(tn+fp)
f1<-2*prec*sens/(prec+sens)
return(f1)
}
Metrics<-function(data) {
tab <- table(data$class,data$scored.class)
tn<-tab[1,1]
tp<-tab[2,2]
fn<-tab[2,1]
fp<-tab[1,2]
acc<-(tp+tn)/(tp+tn+fn+fp)
cer<-(fp+fn)/(tp+tn+fn+fp)
prec<-tp/(tp+fp)
spec<-tp/(tp+fn)
sens<-tn/(tn+fp)
f1<-2*prec*sens/(prec+sens)
results<-data.frame(list(acc,cer,prec,sens,spec,f1))
colnames(results) <- c("Accuracy","Classification Error Rate","Precision","Sensitivity","Specificity","F1 Score")
results<-data.frame(t(results))
colnames(results)<-"Result"
return(results)
}
Metrics(data)
## Result
## Accuracy 0.8066298
## Classification Error Rate 0.1933702
## Precision 0.8437500
## Sensitivity 0.9596774
## Specificity 0.4736842
## F1 Score 0.8979877
The F1 Score is minimized,zero, when either precision or sensitivity is zero. Neither precision nor sensitivity can be less than zero since tp,tn,fn,fp are all counts.
F1 is maximized at 1, when precision=sensitivity=1.
Proof by contradiction. Suppose it could be more than 1.
2ps/(p+s) > 1
Re-arrange: 2sp<-s+p
Re-arrange: 2 > s/p + p/s
Let s/p=x
2 > x+1/x
This is a contradiction since f(x)=x+1/x never attains a value lower than its global minimum of f(1)=2. See:
fâ(x)=1-1/x^2=0
=>x=+/-1, (but s/p is always positive) f(1)=2
Second derivative test.
fââ(1)=2/x^3 = 2>0, implies minimum.
So f is always 2 or great for positive x, this implies that it is never lower than two and our assumption is a contradiction. QED.
â
ROC_Scott<- function(data,t) {
se<-0
sp<-0
a<-0
for (i in 1:round(1/t))
{
se[i]<-sensitivity(reference=as.factor(data$class),data=as.factor(as.numeric(data$scored.probability > 0.01*i)))
sp[i]<-specificity(reference=as.factor(data$class),data=as.factor(as.numeric(data$scored.probability > 0.01*i)))
a[i]<-t/2*(sp[i+1]+se[i])
}
## Area of rectangles
b1<-se[-1]
b2<-se[-round(1/t)]
x1<-sp[-1]
x2<-sp[-round(1/t)]
auc<-sum(b1*(x2-x1))
plot(y=se,x=1-sp,xlab="1-Specificity",ylab="Sensitivity",main = "Scott ROC",type="l")
return(paste0("AUC=",round(auc,3)))
}
#ROC_Scott(data=data,t=.01)
Metrics(data)
## Result
## Accuracy 0.8066298
## Classification Error Rate 0.1933702
## Precision 0.8437500
## Sensitivity 0.9596774
## Specificity 0.4736842
## F1 Score 0.8979877
ROC_Scott(data,t=0.01)
## [1] "AUC=0.854"
Note the values for the matrix, accuracy, Sensitivity and Specificity are all the same.
confusionMatrix(data=data$scored.class,reference = data$class)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 119 30
## 1 5 27
##
## Accuracy : 0.8066
## 95% CI : (0.7415, 0.8615)
## No Information Rate : 0.6851
## P-Value [Acc > NIR] : 0.0001712
##
## Kappa : 0.4916
## Mcnemar's Test P-Value : 4.976e-05
##
## Sensitivity : 0.9597
## Specificity : 0.4737
## Pos Pred Value : 0.7987
## Neg Pred Value : 0.8438
## Prevalence : 0.6851
## Detection Rate : 0.6575
## Detection Prevalence : 0.8232
## Balanced Accuracy : 0.7167
##
## 'Positive' Class : 0
##
Metrics(data)
## Result
## Accuracy 0.8066298
## Classification Error Rate 0.1933702
## Precision 0.8437500
## Sensitivity 0.9596774
## Specificity 0.4736842
## F1 Score 0.8979877
roc(data$class, data$scored.probability,plot=TRUE)
##
## Call:
## roc.default(response = data$class, predictor = data$scored.probability, plot = TRUE)
##
## Data: data$scored.probability in 124 controls (data$class 0) < 57 cases (data$class 1).
## Area under the curve: 0.8503
AUC=0.8503
This compares favorably with the results (0.80) from Problem 11.