library(readr)
library(nnet)
Admission_Predict <- read_csv("C:/Users/USER/Desktop/Admission_Predict.csv")
## Rows: 400 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): gre, sop, cgpa, admitted
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
attach(Admission_Predict)
Admission_Predict$admitted<-as.factor(Admission_Predict$admitted)
data<-Admission_Predict
str(data)
## spc_tbl_ [400 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ gre : num [1:400] 337 324 316 322 314 330 321 308 302 323 ...
## $ sop : num [1:400] 4.5 4 3 3.5 2 4.5 3 3 2 3.5 ...
## $ cgpa : num [1:400] 9.65 8.87 8 8.67 8.21 9.34 8.2 7.9 8 8.6 ...
## $ admitted: Factor w/ 2 levels "0","1": 2 2 2 2 1 2 2 1 1 1 ...
## - attr(*, "spec")=
## .. cols(
## .. gre = col_double(),
## .. sop = col_double(),
## .. cgpa = col_double(),
## .. admitted = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
explore the data
summary(data)
## gre sop cgpa admitted
## Min. :290.0 Min. :1.0 Min. :6.800 0:181
## 1st Qu.:308.0 1st Qu.:2.5 1st Qu.:8.170 1:219
## Median :317.0 Median :3.5 Median :8.610
## Mean :316.8 Mean :3.4 Mean :8.599
## 3rd Qu.:325.0 3rd Qu.:4.0 3rd Qu.:9.062
## Max. :340.0 Max. :5.0 Max. :9.920
table(data$admitted)
##
## 0 1
## 181 219
data partitioning
set.seed(123)
pd<-sample(2,nrow(data),replace = TRUE,prob = c(0.8,0.2))
train<-data[pd==1,]
test<-data[pd==2,]
MODEL
m1<-multinom(admitted~.,data=train)
## # weights: 5 (4 variable)
## initial value 225.272834
## iter 10 value 155.133415
## final value 155.131838
## converged
m1
## Call:
## multinom(formula = admitted ~ ., data = train)
##
## Coefficients:
## (Intercept) gre sop cgpa
## -40.1574370 0.1192646 0.4172153 0.1479590
##
## Residual Deviance: 310.2637
## AIC: 318.2637
miclassification rate
p<-predict(m1,data=train)
p
## [1] 1 1 0 1 1 0 1 1 1 0 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1
## [38] 1 1 0 0 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 1 1 0 0 0 0 1 1 1 1 1 1 1 0
## [75] 0 0 0 0 0 1 1 1 1 0 1 1 1 1 0 1 0 0 1 0 0 1 1 1 0 0 0 1 1 1 1 1 0 1 1 1 0
## [112] 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 1 1 1 1 0
## [149] 0 0 1 0 1 1 1 1 1 1 0 0 0 1 1 0 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0
## [186] 0 1 1 1 0 0 1 1 1 0 0 0 1 1 0 1 1 1 1 0 1 1 1 0 0 1 1 0 1 0 0 0 1 0 0 0 0
## [223] 0 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1 1 0 0 1 1 1 0 1 1 1 0 0 1 1 1 0 0
## [260] 0 1 1 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0
## [297] 1 1 0 0 0 0 1 1 0 0 0 0 1 1 1 0 1 0 0 0 1 0 1 1 1 1 1 1 0
## Levels: 0 1
table
ta<-table(p,train$admitted)
ta
##
## p 0 1
## 0 108 34
## 1 38 145
accuracy (correct classification)
sum(diag(ta))/sum(ta)
## [1] 0.7784615
how many admitted not admitted
table(train$admitted)
##
## 0 1
## 146 179
model performance evaluation
library(ROCR)
pred<-predict(m1,data,type="prob")
head(pred)
## 1 2 3 4 5 6
## 0.9657758 0.8123789 0.4913724 0.7288536 0.3409223 0.9212373
head(data)
histogram
hist(pred)
roc_curve
pred<-prediction(pred,data$admitted)
pred
## A prediction instance
## with 400 data points
eval<-performance(pred,"acc")
plot(eval)
abline(h=0.79,v=0.60)
identify best cut off and accuracy
max<-which.max(slot(eval,"y.values")[[1]])
acc<-slot(eval,"y.values")[[1]][max]
max
## [1] 192
acc
## [1] 0.7925
cutoff<-slot(eval,"x.values")[[1]][max]
cutoff
## 334
## 0.5994509
print(c(accuracy=acc,cutoffp=cutoff))
## accuracy cutoffp.334
## 0.7925000 0.5994509
performance(pred,"tpr")
## A performance instance
## 'Cutoff' vs. 'True positive rate' (alpha: 'none')
## with 398 data points
roc<-performance(pred,"tpr","fpr")
plot(roc)
abline(a=0,b=1)
plot(roc,colorize=T,main="Roc Curve",ylab="sensitivity",xlab="1-specificity")
abline(a=0,b=1)
are under the curve
auc<-performance(pred,"auc")
auc<-unlist(slot(auc,"y.values"))
auc
## [1] 0.8513459
auc<-round(auc,2)
auc
## [1] 0.85
#legend(.6,.2,legend="auc",title = "AUC"))