Read data file

binary <- read_csv("C:/Users/Lenovo/Downloads/binary.csv")
## Rows: 400 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (4): admit, gre, gpa, rank
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(binary) %>% gt()
admit gre gpa rank
0 380 3.61 3
1 660 3.67 3
1 800 4.00 1
1 640 3.19 4
0 520 2.93 4
1 760 3.00 2
str(binary)
## spc_tbl_ [400 x 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ admit: num [1:400] 0 1 1 1 0 1 1 0 1 0 ...
##  $ gre  : num [1:400] 380 660 800 640 520 760 560 400 540 700 ...
##  $ gpa  : num [1:400] 3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92 ...
##  $ rank : num [1:400] 3 3 1 4 4 2 1 2 3 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   admit = col_double(),
##   ..   gre = col_double(),
##   ..   gpa = col_double(),
##   ..   rank = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

We are studying the analysis of admitted students according to three predictors, gre,gpa and rank

Logistic Regression Model

mymodel <- multinom(admit~., data = binary)
## # weights:  5 (4 variable)
## initial  value 277.258872 
## final  value 229.720883 
## converged

MissClassification Rate

p <- predict(mymodel, binary)
tab <- table(p,binary$admit)
accuracy <- sum(diag(tab))/sum(tab)
print(accuracy)
## [1] 0.705

** Approx 70% accuracy for this model**

MissClassifficationRate <- 1-accuracy
print(MissClassifficationRate)
## [1] 0.295

** Approx 30%**

We want to know if the model is good with this accuracy! test the benchmark number of the admitted students

table(binary$admit)
## 
##   0   1 
## 273 127

Do the calculation manually 273/400=0.68, so it is about 68% still less the our logistic accuracy model

Model Performance Evaluation

library(ROCR)
pred <- predict(mymodel, binary, type='prob')
head(pred)
##          1          2          3          4          5          6 
## 0.18955119 0.31778152 0.71781816 0.14894795 0.09795241 0.37867798
hist(pred)

pred <- prediction(pred, binary$admit)
eval <- performance(pred, "acc")
plot(eval)
abline(h=0.71, v=0.45)

Identifying best cutoff and accuracy values

max <- which.max(slot(eval, "y.values")[[1]])
acc <- slot(eval, "y.values")[[1]][max]
print(acc)
## [1] 0.7175
cut <- slot(eval, "x.values")[[1]][max]
print(cut)
##       158 
## 0.4683497
print(c(Accuracy=acc, Cutoff= cut))
##   Accuracy Cutoff.158 
##  0.7175000  0.4683497

Receiver Operating Characteristic(ROC) Curve

roc <- performance(pred, "tpr","fpr")
plot(roc,
     colorize=T,
     main="ROC Curve",
     ylab="Sensitivity",
     xlab="1-Specificity")
abline(a=0, b=1)

Area Under Curve (AUC)

auc <- performance(pred, "auc")
auc <- unlist(slot(auc, "y.values"))
auc <- round(auc,4)
print(auc)
## [1] 0.6921