Load Dataset

raw =read.csv("https://raw.githubusercontent.com/mascotinme/MSDA-IS621/master/classification-output-data%20(1).csv", sep = ",", header= TRUE)

Question 2

t <- table(raw$class,raw$scored.class)
colnames(t) <- c('Real Positive', 'Real Negative')
rownames(t) <- c('Model Positive', 'Model Negative')
t
##                 
##                  Real Positive Real Negative
##   Model Positive           119             5
##   Model Negative            30            27

Question 3

Underlisted are the written functions for question 3 to question 8

getConfusionMatrix <- function(df) {
  t <- table(df$class,df$scored.class)
  colnames(t) <- c('Real Positive', 'Real Negative')
  rownames(t) <- c('Model Positive', 'Model Negative')
  
  return(t)
}

Accuracy

ReturnAccuracy <- function(df) {
  cm <- getConfusionMatrix(df)
  
  tn <- cm["Model Negative", "Real Negative"]
  fp <- cm["Model Positive", "Real Negative"]
  fn <- cm["Model Negative", "Real Positive"]
  tp <- cm["Model Positive", "Real Positive"]
  
  accuracy <- (tn+tp) / (tn+fp+fn+tp)
  return(accuracy)
}

Classification Error Rate (CER)

ReturnCER <- function(df) {
  cm <- getConfusionMatrix(df)
  
  tn <- cm["Model Negative", "Real Negative"]
  fp <- cm["Model Positive", "Real Negative"]
  fn <- cm["Model Negative", "Real Positive"]
  tp <- cm["Model Positive", "Real Positive"]
  
  cer <- (fp + fn) / (tn+fp+fn+tp)
  return(cer)
}

Precision

ReturnPrecision <- function(df) {
  cm <- getConfusionMatrix(df)
  
  tn <- cm["Model Negative", "Real Negative"]
  fp <- cm["Model Positive", "Real Negative"]
  fn <- cm["Model Negative", "Real Positive"]
  tp <- cm["Model Positive", "Real Positive"]
  
  precision <- tp / (fp+tp)
  return(precision)
  
}

Sensitivity

ReturnSensitivity <- function(df){
  cm <- getConfusionMatrix(df)
  
  tn <- cm["Model Negative", "Real Negative"]
  fp <- cm["Model Positive", "Real Negative"]
  fn <- cm["Model Negative", "Real Positive"]
  tp <- cm["Model Positive", "Real Positive"]
  
  sensitivity <- tp/(tp+fn)
  
  return(sensitivity)
}

Specificity

ReturnSpecificity <- function(df){
  cm <- getConfusionMatrix(df)
  
  tn <- cm["Model Negative", "Real Negative"]
  fp <- cm["Model Positive", "Real Negative"]
  fn <- cm["Model Negative", "Real Positive"]
  tp <- cm["Model Positive", "Real Positive"]
  
  specificity <- tn / (tn+fp) 
  return(specificity)
}

F1Score

ReturnF1Score <- function(df) {
  
  cm <- getConfusionMatrix(df)
  
  tn <- cm["Model Negative", "Real Negative"]
  fp <- cm["Model Positive", "Real Negative"]
  fn <- cm["Model Negative", "Real Positive"]
  tp <- cm["Model Positive", "Real Positive"]
  
  precision <- getPrecision(df)
  sensitivity <- getSensitivity(df)
  f1score <- 2* (precision * sensitivity) / (precision+sensitivity)
  
  return(f1score)
  
}



#predictions <- log_reg(raw1, size=10)
str(raw)
## 'data.frame':    181 obs. of  11 variables:
##  $ pregnant          : int  7 2 3 1 4 1 9 8 1 2 ...
##  $ glucose           : int  124 122 107 91 83 100 89 120 79 123 ...
##  $ diastolic         : int  70 76 62 64 86 74 62 78 60 48 ...
##  $ skinfold          : int  33 27 13 24 19 12 0 0 42 32 ...
##  $ insulin           : int  215 200 48 0 0 46 0 0 48 165 ...
##  $ bmi               : num  25.5 35.9 22.9 29.2 29.3 19.5 22.5 25 43.5 42.1 ...
##  $ pedigree          : num  0.161 0.483 0.678 0.192 0.317 0.149 0.142 0.409 0.678 0.52 ...
##  $ age               : int  37 26 23 21 34 28 33 64 23 26 ...
##  $ class             : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ scored.class      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ scored.probability: num  0.328 0.273 0.11 0.056 0.1 ...
attach(raw)
ab <- rep(NA, length(scored.probability))
reg <- glm (class ~ scored.probability, family=binomial, data=raw )

S <- predict(reg, type = "response")

roc.curve=function(s,print=FALSE){
Ps=(S>s)*1
FP=sum((Ps==1)*(class==0))/sum(class==0)
TP=sum((Ps==1)*(class==1))/sum(class==1)
if(print==TRUE){
print(table(Observed=class,Predicted=Ps))
 }
 vect=c(FP,TP)
names(vect)=c("FPR","TPR")
return(vect)
}

threshold = 0.4
roc.curve(threshold,print=TRUE)
##         Predicted
## Observed   0   1
##        0 109  15
##        1  19  38
##       FPR       TPR 
## 0.1209677 0.6666667
ROC.curve=Vectorize(roc.curve)

I=(((S>threshold)&(class==0))|((S<=threshold)&(class==1)))
plot(S,class,col=c("red","blue")[I+1],pch=19,cex=.7,,xlab="Specificity",ylab="Sensitivity")
abline(col="gray")

M.ROC=ROC.curve(seq(0,1,by=.01))
plot(M.ROC[1,],M.ROC[2,],col="grey",lwd=2,type="l",xlab="Specificity",ylab="Sensitivity")