ML_ROC_CURVE

library(readr)
library(nnet)
Admission_Predict <- read_csv("C:/Users/USER/Desktop/Admission_Predict.csv")

## Rows: 400 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): gre, sop, cgpa, admitted
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

attach(Admission_Predict)
Admission_Predict$admitted<-as.factor(Admission_Predict$admitted)
data<-Admission_Predict
str(data)

## spc_tbl_ [400 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ gre     : num [1:400] 337 324 316 322 314 330 321 308 302 323 ...
##  $ sop     : num [1:400] 4.5 4 3 3.5 2 4.5 3 3 2 3.5 ...
##  $ cgpa    : num [1:400] 9.65 8.87 8 8.67 8.21 9.34 8.2 7.9 8 8.6 ...
##  $ admitted: Factor w/ 2 levels "0","1": 2 2 2 2 1 2 2 1 1 1 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   gre = col_double(),
##   ..   sop = col_double(),
##   ..   cgpa = col_double(),
##   ..   admitted = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

explore the data

summary(data)

##       gre             sop           cgpa       admitted
##  Min.   :290.0   Min.   :1.0   Min.   :6.800   0:181   
##  1st Qu.:308.0   1st Qu.:2.5   1st Qu.:8.170   1:219   
##  Median :317.0   Median :3.5   Median :8.610           
##  Mean   :316.8   Mean   :3.4   Mean   :8.599           
##  3rd Qu.:325.0   3rd Qu.:4.0   3rd Qu.:9.062           
##  Max.   :340.0   Max.   :5.0   Max.   :9.920

table(data$admitted)

## 
##   0   1 
## 181 219

data partitioning

set.seed(123)
pd<-sample(2,nrow(data),replace = TRUE,prob = c(0.8,0.2))
train<-data[pd==1,]
test<-data[pd==2,]

MODEL

m1<-multinom(admitted~.,data=train)

## # weights:  5 (4 variable)
## initial  value 225.272834 
## iter  10 value 155.133415
## final  value 155.131838 
## converged

m1

## Call:
## multinom(formula = admitted ~ ., data = train)
## 
## Coefficients:
## (Intercept)         gre         sop        cgpa 
## -40.1574370   0.1192646   0.4172153   0.1479590 
## 
## Residual Deviance: 310.2637 
## AIC: 318.2637

miclassification rate

p<-predict(m1,data=train)
p

##   [1] 1 1 0 1 1 0 1 1 1 0 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1
##  [38] 1 1 0 0 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 1 1 0 0 0 0 1 1 1 1 1 1 1 0
##  [75] 0 0 0 0 0 1 1 1 1 0 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 0 1 1 1 1 1 0 1 1 1 0
## [112] 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 1 1 1 1 0
## [149] 0 0 1 0 1 1 1 1 1 1 0 0 0 1 1 0 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0
## [186] 0 1 1 1 0 0 1 1 1 0 0 0 1 1 0 1 1 1 1 0 1 1 1 0 0 1 1 0 1 0 0 0 1 0 0 0 0
## [223] 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 0 0 1 1 1 0 1 1 1 0 0 1 1 1 0 0
## [260] 0 1 1 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0
## [297] 1 1 0 0 0 0 1 1 0 0 0 0 1 1 1 0 1 0 0 0 1 0 1 1 1 1 1 1 0
## Levels: 0 1

table

ta<-table(p,train$admitted)
ta

##    
## p     0   1
##   0 108  34
##   1  38 145

accuracy (correct classification)

sum(diag(ta))/sum(ta)

## [1] 0.7784615

how many admitted not admitted

table(train$admitted)

## 
##   0   1 
## 146 179

model performance evaluation

library(ROCR)
pred<-predict(m1,data,type="prob")
head(pred)

##         1         2         3         4         5         6 
## 0.9657758 0.8123789 0.4913724 0.7288536 0.3409223 0.9212373

head(data)

histogram

hist(pred)

roc_curve

pred<-prediction(pred,data$admitted)
pred

## A prediction instance
##   with 400 data points

eval<-performance(pred,"acc")
plot(eval)
abline(h=0.79,v=0.60)

identify best cut off and accuracy

max<-which.max(slot(eval,"y.values")[[1]])
acc<-slot(eval,"y.values")[[1]][max]
max

## [1] 192

acc

## [1] 0.7925

cutoff<-slot(eval,"x.values")[[1]][max]
cutoff

##       334 
## 0.5994509

print(c(accuracy=acc,cutoffp=cutoff))

##    accuracy cutoffp.334 
##   0.7925000   0.5994509

performance(pred,"tpr")

## A performance instance
##   'Cutoff' vs. 'True positive rate' (alpha: 'none')
##   with 398 data points

roc<-performance(pred,"tpr","fpr")
plot(roc)
abline(a=0,b=1)

plot(roc,colorize=T,main="Roc Curve",ylab="sensitivity",xlab="1-specificity")
abline(a=0,b=1)

are under the curve

auc<-performance(pred,"auc")
auc<-unlist(slot(auc,"y.values"))
auc

## [1] 0.8513459

auc<-round(auc,2)
auc

## [1] 0.85

#legend(.6,.2,legend="auc",title = "AUC"))

ML_ROC_CURVE

mugo_muiruri_james

2023-09-17