binary <- read_csv("C:/Users/Lenovo/Downloads/binary.csv")
## Rows: 400 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## dbl (4): admit, gre, gpa, rank
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(binary) %>% gt()
| admit | gre | gpa | rank |
|---|---|---|---|
| 0 | 380 | 3.61 | 3 |
| 1 | 660 | 3.67 | 3 |
| 1 | 800 | 4.00 | 1 |
| 1 | 640 | 3.19 | 4 |
| 0 | 520 | 2.93 | 4 |
| 1 | 760 | 3.00 | 2 |
str(binary)
## spc_tbl_ [400 x 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ admit: num [1:400] 0 1 1 1 0 1 1 0 1 0 ...
## $ gre : num [1:400] 380 660 800 640 520 760 560 400 540 700 ...
## $ gpa : num [1:400] 3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92 ...
## $ rank : num [1:400] 3 3 1 4 4 2 1 2 3 2 ...
## - attr(*, "spec")=
## .. cols(
## .. admit = col_double(),
## .. gre = col_double(),
## .. gpa = col_double(),
## .. rank = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
We are studying the analysis of admitted students according to three predictors, gre,gpa and rank
mymodel <- multinom(admit~., data = binary)
## # weights: 5 (4 variable)
## initial value 277.258872
## final value 229.720883
## converged
p <- predict(mymodel, binary)
tab <- table(p,binary$admit)
accuracy <- sum(diag(tab))/sum(tab)
print(accuracy)
## [1] 0.705
** Approx 70% accuracy for this model**
MissClassifficationRate <- 1-accuracy
print(MissClassifficationRate)
## [1] 0.295
** Approx 30%**
We want to know if the model is good with this accuracy! test the benchmark number of the admitted students
table(binary$admit)
##
## 0 1
## 273 127
Do the calculation manually 273/400=0.68, so it is about 68% still less the our logistic accuracy model
library(ROCR)
pred <- predict(mymodel, binary, type='prob')
head(pred)
## 1 2 3 4 5 6
## 0.18955119 0.31778152 0.71781816 0.14894795 0.09795241 0.37867798
hist(pred)
pred <- prediction(pred, binary$admit)
eval <- performance(pred, "acc")
plot(eval)
abline(h=0.71, v=0.45)
max <- which.max(slot(eval, "y.values")[[1]])
acc <- slot(eval, "y.values")[[1]][max]
print(acc)
## [1] 0.7175
cut <- slot(eval, "x.values")[[1]][max]
print(cut)
## 158
## 0.4683497
print(c(Accuracy=acc, Cutoff= cut))
## Accuracy Cutoff.158
## 0.7175000 0.4683497
roc <- performance(pred, "tpr","fpr")
plot(roc,
colorize=T,
main="ROC Curve",
ylab="Sensitivity",
xlab="1-Specificity")
abline(a=0, b=1)
auc <- performance(pred, "auc")
auc <- unlist(slot(auc, "y.values"))
auc <- round(auc,4)
print(auc)
## [1] 0.6921