Load Dataset
raw =read.csv("https://raw.githubusercontent.com/mascotinme/MSDA-IS621/master/classification-output-data%20(1).csv", sep = ",", header= TRUE)Question 2
t <- table(raw$class,raw$scored.class)
colnames(t) <- c('Real Positive', 'Real Negative')
rownames(t) <- c('Model Positive', 'Model Negative')
t##
## Real Positive Real Negative
## Model Positive 119 5
## Model Negative 30 27
Question 3
Underlisted are the written functions for question 3 to question 8
getConfusionMatrix <- function(df) {
t <- table(df$class,df$scored.class)
colnames(t) <- c('Real Positive', 'Real Negative')
rownames(t) <- c('Model Positive', 'Model Negative')
return(t)
}Accuracy
ReturnAccuracy <- function(df) {
cm <- getConfusionMatrix(df)
tn <- cm["Model Negative", "Real Negative"]
fp <- cm["Model Positive", "Real Negative"]
fn <- cm["Model Negative", "Real Positive"]
tp <- cm["Model Positive", "Real Positive"]
accuracy <- (tn+tp) / (tn+fp+fn+tp)
return(accuracy)
}
Classification Error Rate (CER)
ReturnCER <- function(df) {
cm <- getConfusionMatrix(df)
tn <- cm["Model Negative", "Real Negative"]
fp <- cm["Model Positive", "Real Negative"]
fn <- cm["Model Negative", "Real Positive"]
tp <- cm["Model Positive", "Real Positive"]
cer <- (fp + fn) / (tn+fp+fn+tp)
return(cer)
}Precision
ReturnPrecision <- function(df) {
cm <- getConfusionMatrix(df)
tn <- cm["Model Negative", "Real Negative"]
fp <- cm["Model Positive", "Real Negative"]
fn <- cm["Model Negative", "Real Positive"]
tp <- cm["Model Positive", "Real Positive"]
precision <- tp / (fp+tp)
return(precision)
}Sensitivity
ReturnSensitivity <- function(df){
cm <- getConfusionMatrix(df)
tn <- cm["Model Negative", "Real Negative"]
fp <- cm["Model Positive", "Real Negative"]
fn <- cm["Model Negative", "Real Positive"]
tp <- cm["Model Positive", "Real Positive"]
sensitivity <- tp/(tp+fn)
return(sensitivity)
}Specificity
ReturnSpecificity <- function(df){
cm <- getConfusionMatrix(df)
tn <- cm["Model Negative", "Real Negative"]
fp <- cm["Model Positive", "Real Negative"]
fn <- cm["Model Negative", "Real Positive"]
tp <- cm["Model Positive", "Real Positive"]
specificity <- tn / (tn+fp)
return(specificity)
}F1Score
ReturnF1Score <- function(df) {
cm <- getConfusionMatrix(df)
tn <- cm["Model Negative", "Real Negative"]
fp <- cm["Model Positive", "Real Negative"]
fn <- cm["Model Negative", "Real Positive"]
tp <- cm["Model Positive", "Real Positive"]
precision <- getPrecision(df)
sensitivity <- getSensitivity(df)
f1score <- 2* (precision * sensitivity) / (precision+sensitivity)
return(f1score)
}
#predictions <- log_reg(raw1, size=10)
str(raw)## 'data.frame': 181 obs. of 11 variables:
## $ pregnant : int 7 2 3 1 4 1 9 8 1 2 ...
## $ glucose : int 124 122 107 91 83 100 89 120 79 123 ...
## $ diastolic : int 70 76 62 64 86 74 62 78 60 48 ...
## $ skinfold : int 33 27 13 24 19 12 0 0 42 32 ...
## $ insulin : int 215 200 48 0 0 46 0 0 48 165 ...
## $ bmi : num 25.5 35.9 22.9 29.2 29.3 19.5 22.5 25 43.5 42.1 ...
## $ pedigree : num 0.161 0.483 0.678 0.192 0.317 0.149 0.142 0.409 0.678 0.52 ...
## $ age : int 37 26 23 21 34 28 33 64 23 26 ...
## $ class : int 0 0 1 0 0 0 0 0 0 0 ...
## $ scored.class : int 0 0 0 0 0 0 0 0 0 0 ...
## $ scored.probability: num 0.328 0.273 0.11 0.056 0.1 ...
attach(raw)
ab <- rep(NA, length(scored.probability))
reg <- glm (class ~ scored.probability, family=binomial, data=raw )
S <- predict(reg, type = "response")
roc.curve=function(s,print=FALSE){
Ps=(S>s)*1
FP=sum((Ps==1)*(class==0))/sum(class==0)
TP=sum((Ps==1)*(class==1))/sum(class==1)
if(print==TRUE){
print(table(Observed=class,Predicted=Ps))
}
vect=c(FP,TP)
names(vect)=c("FPR","TPR")
return(vect)
}
threshold = 0.4
roc.curve(threshold,print=TRUE)## Predicted
## Observed 0 1
## 0 109 15
## 1 19 38
## FPR TPR
## 0.1209677 0.6666667
ROC.curve=Vectorize(roc.curve)
I=(((S>threshold)&(class==0))|((S<=threshold)&(class==1)))
plot(S,class,col=c("red","blue")[I+1],pch=19,cex=.7,,xlab="Specificity",ylab="Sensitivity")
abline(col="gray")M.ROC=ROC.curve(seq(0,1,by=.01))
plot(M.ROC[1,],M.ROC[2,],col="grey",lwd=2,type="l",xlab="Specificity",ylab="Sensitivity")