Data College adalah data statistik dari kampus-kampus yang ada di Amerika dari tahun 1995. Penelitian ini akan mengklasifikasikan universitas negeri dengan swasta pada variabel “Private” dengan “no” berarti bukan kampus negeri dan “yes” kampus negeri.
Memanggil data
library("neuralnet")
library(ISLR)
data(College)
data <- College
head(data)
## Private Apps Accept Enroll Top10perc
## Abilene Christian University Yes 1660 1232 721 23
## Adelphi University Yes 2186 1924 512 16
## Adrian College Yes 1428 1097 336 22
## Agnes Scott College Yes 417 349 137 60
## Alaska Pacific University Yes 193 146 55 16
## Albertson College Yes 587 479 158 38
## Top25perc F.Undergrad P.Undergrad Outstate
## Abilene Christian University 52 2885 537 7440
## Adelphi University 29 2683 1227 12280
## Adrian College 50 1036 99 11250
## Agnes Scott College 89 510 63 12960
## Alaska Pacific University 44 249 869 7560
## Albertson College 62 678 41 13500
## Room.Board Books Personal PhD Terminal
## Abilene Christian University 3300 450 2200 70 78
## Adelphi University 6450 750 1500 29 30
## Adrian College 3750 400 1165 53 66
## Agnes Scott College 5450 450 875 92 97
## Alaska Pacific University 4120 800 1500 76 72
## Albertson College 3335 500 675 67 73
## S.F.Ratio perc.alumni Expend Grad.Rate
## Abilene Christian University 18.1 12 7041 60
## Adelphi University 12.2 16 10527 56
## Adrian College 12.9 30 8735 54
## Agnes Scott College 7.7 37 19016 59
## Alaska Pacific University 11.9 2 10922 15
## Albertson College 9.4 11 9727 55
Normalisasi data
max_data <- apply(data[,2:18], 2, max)
min_data <- apply(data[,2:18], 2, min)
data_scaled <- scale(data[,2:18],center = min_data, scale = max_data - min_data)
Mengubah variabel private menjadi numerik
Private <- as.numeric(College$Private)-1
#Menggabungkan data Private ke data_scaled
data_scaled <- cbind(Private,data_scaled)
Membuat data testing dan training
index = sample(1:nrow(data),round(0.70*nrow(data)))
train_data <- as.data.frame(data_scaled[index,])
test_data <- as.data.frame(data_scaled[-index,])
Membuat model neural network MLP
model.mlp <- neuralnet(formula=Private~Apps+Accept+Enroll+Top10perc+Top25perc+F.Undergrad+P.Undergrad+Outstate+Room.Board+Books+Personal+PhD+Terminal+S.F.Ratio+perc.alumni+Expend+Grad.Rate,data=train_data,hidden=c(17,17,10,5),linear.output=F)
library(NeuralNetTools)
plotnet(model.mlp)
Prediksi data testing
predicted_data <- compute(model.mlp,test_data[,2:18])
print(head(predicted_data$net.result))
## [,1]
## Abilene Christian University 0.5405016903
## Agnes Scott College 0.9999999966
## Albion College 0.9999999962
## Albright College 0.9999999970
## Allegheny College 0.9999999972
## Allentown Coll. of St. Francis de Sales 0.9999999908
#Membulatkan data net.result
predicted_data$net.result <- sapply(predicted_data$net.result,round,digits=0)
print(head(predicted_data$net.result))
## [1] 1 1 1 1 1 1
Membuat confusion matrix
tabel.confusion.matrix <- table(test_data$Private,predicted_data$net.result)
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
confusionMatrix(tabel.confusion.matrix)
## Confusion Matrix and Statistics
##
##
## 0 1
## 0 57 6
## 1 8 162
##
## Accuracy : 0.9399142
## 95% CI : (0.9012404, 0.9667646)
## No Information Rate : 0.72103
## P-Value [Acc > NIR] : < 0.0000000000000002
##
## Kappa : 0.8492188
##
## Mcnemar's Test P-Value : 0.789268
##
## Sensitivity : 0.8769231
## Specificity : 0.9642857
## Pos Pred Value : 0.9047619
## Neg Pred Value : 0.9529412
## Prevalence : 0.2789700
## Detection Rate : 0.2446352
## Detection Prevalence : 0.2703863
## Balanced Accuracy : 0.9206044
##
## 'Positive' Class : 0
##