package yang dibutuhkan:
Library
library(tidyverse)
library(kableExtra)
library(ggplot2)
library(ggthemes)
library(stringr)
library(reshape2)
library(mice)
library(nortest)
library(DescTools)
library(caret)
library(rpart)
library(rpart.plot)
library(ROCit)
library(PRROC)
library(ROCR)
library(vip)
library(googlesheets4)
Data
Data Formatting
df_authors_ok$Jenjang <- as.factor(df_authors_ok$Jenjang)
df_authors_ok$Departemen <- as.factor(df_authors_ok$Departemen)
df_authors_ok$Akreditasi_Dept <- as.factor(df_authors_ok$Akreditasi_Dept)
df_authors_ok$Dept_Jenjang <- as.factor(df_authors_ok$Dept_Jenjang)
#struktur data setelah formating
glimpse(df_authors_ok[,c("Jenjang", "Departemen", "Akreditasi_Dept", "Dept_Jenjang")])
## Rows: 1,113
## Columns: 4
## $ Jenjang <fct> S3, S1, S2, S3, S2, S3, S3, S1, S2, S2, S2, S2, S2, S1~
## $ Departemen <fct> Ilmu Komputer, Teknik Komputer, Teknik Informatika, Il~
## $ Akreditasi_Dept <fct> Baik Sekali, Unggul, Unggul, Baik Sekali, Unggul, Ungg~
## $ Dept_Jenjang <fct> Ilmu Komputer - S3, Teknik Komputer - S1, Teknik Infor~
Check Jenjang Factor
levels(df_authors_ok$Jenjang) #Jenjang awal
## [1] "D3" "D4" "S1" "S2" "S3"
levels(df_authors_ok$Akreditasi_Dept)
## [1] "Baik" "Baik Sekali" "Unggul"
Rumpun Ilmu dari Prodi
#Membentuk rumpun ilmu berdasarkan kode prodi 2 digit
df_rumpun <- df_authors_ok %>%
select(Departemen_ID,Departemen) %>%
group_by(Departemen_ID,Departemen) %>%
summarize() %>%
mutate(Departemen_ID_2Digit = substr(Departemen_ID,1,2)) %>%
na.omit()
## `summarise()` has grouped output by 'Departemen_ID'. You can override using the
## `.groups` argument.
df_rumpun
#Membentuk rumpun ilmu berdasarkan kode prodi 2 digit
df_rumpun <- df_rumpun %>%
mutate(Rumpun_Ilmu = case_when(Departemen_ID_2Digit==11 ~ "Kesehatan",
Departemen_ID_2Digit %in% c(20:38, 54:59) ~ "Teknik",
Departemen_ID_2Digit %in% c(44:51,94) ~ "MIPA",
Departemen_ID_2Digit %in% c(61:63,93) ~ "Ekonomi",
Departemen_ID_2Digit %in% c(60) ~ "Sosial Humaniora",
Departemen_ID_2Digit %in% c(41) ~ "Pertanian, Perikanan, dan Ternak",
Departemen_ID_2Digit %in% c(90) ~ "Seni, Desain, Media"
))
df_rumpun
#dataframe rumpun ilmu yang akan digunakan untuk di merge dengan data awal
df_rumpun_oke <- df_rumpun %>% select(Departemen_ID,Rumpun_Ilmu)
Analisis
#struktur data
glimpse(df_authors_ok)
## Rows: 1,113
## Columns: 37
## $ SINTA_ID <dbl> 29555, 6005015, 5976088, 29560, 5977802,~
## $ Departemen_ID <dbl> 55001, 90243, 55101, 55001, 49101, 24001~
## $ Nama <chr> "RIYANARTO SARNO", "MAURIDHI HERY PURNOM~
## $ Universitas <chr> "Institut Teknologi Sepuluh Nopember", "~
## $ Departemen <fct> Ilmu Komputer, Teknik Komputer, Teknik I~
## $ Jenjang <fct> S3, S1, S2, S3, S2, S3, S3, S1, S2, S2, ~
## $ Dept_Jenjang <fct> Ilmu Komputer - S3, Teknik Komputer - S1~
## $ SINTA_Score_Overall <dbl> 9134, 8887, 4696, 4070, 3310, 4195, 4049~
## $ SINTA_Score_3Yr <dbl> 3068, 2544, 1881, 1624, 1496, 1316, 1279~
## $ Scopus_Artikel <dbl> 346, 444, 155, 139, 101, 103, 174, 128, ~
## $ Scopus_Citation <dbl> 2616, 2187, 657, 903, 313, 728, 935, 347~
## $ Scopus_Cited_Document <dbl> 279, 333, 107, 94, 68, 85, 132, 84, 122,~
## $ Scopus_H_Index <dbl> 26, 19, 14, 15, 9, 14, 17, 8, 17, 10, 15~
## $ Scopus_i10_Index <dbl> 85, 60, 22, 25, 9, 19, 28, 6, 39, 10, 25~
## $ Scopus_G_Index <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 1~
## $ GScholar_Artikel <dbl> 520, 712, 296, 196, 168, 214, 240, 211, ~
## $ GScholar_Citation <dbl> 4797, 4228, 1487, 1363, 701, 1526, 1425,~
## $ GScholar_Cited_Document <dbl> 365, 487, 208, 121, 104, 144, 173, 122, ~
## $ GScholar_H_Index <dbl> 32, 27, 20, 17, 14, 20, 19, 12, 23, 13, ~
## $ GScholar_i10_Index <dbl> 145, 126, 49, 31, 25, 43, 43, 17, 67, 25~
## $ GScholar_G_Index <dbl> 1, 1, 1, 1, 3, 1, 2, 1, 4, 1, 1, 2, 1, 1~
## $ WOS_Artikel <dbl> 107, 0, 61, 72, 49, 32, 0, 0, 0, 46, 91,~
## $ WOS_Citation <dbl> 570, 0, 259, 363, 135, 369, 0, 0, 0, 174~
## $ WOS_Cited_Document <dbl> 88, 0, 46, 40, 30, 31, 0, 0, 0, 36, 70, ~
## $ WOS_H_Index <dbl> 13, NA, 8, 9, 7, 11, NA, NA, NA, 6, 14, ~
## $ WOS_i10_Index <dbl> 17, NA, 7, 9, 3, 13, NA, NA, NA, 4, 21, ~
## $ `WOS_G-Index` <dbl> 1, NA, 1, 1, 4, 14, NA, NA, NA, 4, 2, 1,~
## $ SINTA_Score_Productivity_Dept <dbl> 9275, 1860, 4025, 9275, 3199, 3518, 2002~
## $ Akreditasi_Dept <fct> Baik Sekali, Unggul, Unggul, Baik Sekali~
## $ Jumlah_Dosen_Total <dbl> 6, 18, 6, 6, 7, 6, 8, 18, 7, 7, 6, 6, 6,~
## $ Jumlah_Mahasiswa <dbl> 68, 324, 115, 68, 107, 35, 38, 324, 107,~
## $ Rasio_Dosen_per_Mahasiswa <dbl> 0.2500000, 0.1912046, 0.1912046, 0.25000~
## $ STATUS_i10index_3sources <chr> "EXCELLENT", "EXCELLENT", "EXCELLENT", "~
## $ `STATUS_g-index_Scopus` <chr> "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRA~
## $ `STATUS_g-index_gscholar` <chr> "NEUTRAL", "NEUTRAL", "NEUTRAL", "NEUTRA~
## $ `STATUS_H-index_Scopus` <chr> "SUCCESSFUL", "UNKNOWN", "UNKNOWN", "UNK~
## $ `STATUS_H-index_Gscholar` <chr> "SUCCESSFUL", "SUCCESSFUL", "SUCCESSFUL"~
Data
Unit Observasi = Authors
y = SINTA_Score_3Yr yang dikategorisasi menjadi tinggi
dan rendah
x1 = Rumpun Ilmu (Ganjil 2021)
x2 = Jenjang (Ganjil 2021)
x3 = Akreditasi_Dept (Ganjil 2021)
x4 = Total Jumlah Dosen (Ganjil 2021)
x5 = Jumlah Mahasiswa (Ganjil 2021)
x6 = Rasio Dosen per Mahasiswa (Ganjil 2021)
data_1 <- df_authors_ok %>%
left_join(df_rumpun_oke, by="Departemen_ID") %>%
select(SINTA_Score_3Yr,Departemen,Rumpun_Ilmu,Jenjang,Akreditasi_Dept,Jumlah_Dosen_Total,
Jumlah_Mahasiswa,Rasio_Dosen_per_Mahasiswa) %>%
mutate(y = ifelse(SINTA_Score_3Yr>=256,"1","0")) #kelas 1:SINTA_Score_3Yr yang tinggi
data_1$y <- as.factor(data_1$y)
data_1$Rumpun_Ilmu <- as.factor(data_1$Rumpun_Ilmu)
str(data_1)
## tibble [1,113 x 9] (S3: tbl_df/tbl/data.frame)
## $ SINTA_Score_3Yr : num [1:1113] 3068 2544 1881 1624 1496 ...
## $ Departemen : Factor w/ 54 levels "Arsitektur","Biologi",..: 10 36 33 10 23 35 8 36 23 23 ...
## $ Rumpun_Ilmu : Factor w/ 7 levels "Ekonomi","Kesehatan",..: 7 5 7 7 3 7 3 5 3 3 ...
## $ Jenjang : Factor w/ 5 levels "D3","D4","S1",..: 5 3 4 5 4 5 5 3 4 4 ...
## $ Akreditasi_Dept : Factor w/ 3 levels "Baik","Baik Sekali",..: 2 3 3 2 3 3 3 3 3 3 ...
## $ Jumlah_Dosen_Total : num [1:1113] 6 18 6 6 7 6 8 18 7 7 ...
## $ Jumlah_Mahasiswa : num [1:1113] 68 324 115 68 107 35 38 324 107 107 ...
## $ Rasio_Dosen_per_Mahasiswa: num [1:1113] 0.25 0.191 0.191 0.25 0.187 ...
## $ y : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
#Cek missing values
md.pattern(data_1,rotate.names = TRUE)
## SINTA_Score_3Yr y Departemen Rumpun_Ilmu Jenjang Jumlah_Dosen_Total
## 981 1 1 1 1 1 1
## 44 1 1 1 1 1 1
## 31 1 1 1 1 1 1
## 6 1 1 1 1 1 1
## 51 1 1 0 0 0 0
## 0 0 51 51 51 51
## Jumlah_Mahasiswa Rasio_Dosen_per_Mahasiswa Akreditasi_Dept
## 981 1 1 1 0
## 44 1 1 0 1
## 31 1 0 1 1
## 6 1 0 0 2
## 51 0 0 0 7
## 51 88 101 444
data_1 <- data_1 %>% filter(!is.na(Departemen),!is.na(Jenjang),!is.na(Akreditasi_Dept),
Rasio_Dosen_per_Mahasiswa!=Inf)
head(data_1) #data yang akan digunakan
EDA Data
Peubah Respon (y)
#format data yang dibutuhkan
data_chart <- data_1 %>%
group_by(y) %>%
summarize(value=n()) %>%
mutate(prop = round(value / sum(value) *100, digits = 2))
#pie chart: Sebaran Authors Berdasarkan Kategori SINTA_Score_3Yr
ggplot(data_chart, aes(x="", y=prop, fill=y)) +
geom_bar(stat="identity", width=1, color="white") +
coord_polar("y", start=0) +
labs(title= "Proporsi Authors Menurut Kategori SINTA_Score_3Yr",
subtitle = "Institut Teknologi Sepuluh Nopember") +
theme_void()
Peubah Prediktor (X) Numerik
#Density Jumlah_Dosen_Total
ggplot(data_1, aes(x=Jumlah_Dosen_Total)) +
geom_histogram(fill="#69b3a2", color="#e9ecef", alpha=0.8, bins=15)+
theme_light() +
labs(x="Jumlah_Dosen_Total",
y="Density",
title= "Sebaran Jumlah_Dosen_Total",
subtitle = "Institut Teknologi Sepuluh Nopember")
#Density Jumlah_Mahasiswa
ggplot(data_1, aes(x=Jumlah_Mahasiswa)) +
geom_histogram(fill="#69b3a2", color="#e9ecef", alpha=0.8, bins=15)+
theme_light() +
labs(x="Jumlah_Mahasiswa",
y="Density",
title= "Sebaran Jumlah_Mahasiswa",
subtitle = "Institut Teknologi Sepuluh Nopember")
#Density Rasio_Dosen_per_Mahasiswa
ggplot(data_1, aes(x=Rasio_Dosen_per_Mahasiswa)) +
geom_histogram(fill="#69b3a2", color="#e9ecef", alpha=0.8, bins=20)+
theme_light() +
labs(x="Rasio_Dosen_per_Mahasiswa",
y="Density",
title= "Sebaran Rasio_Dosen_per_Mahasiswa",
subtitle = "Institut Teknologi Sepuluh Nopember")
Peubah Prediktor (X) Kategorik
# Akreditasi_Dept
data_bar_chart = data_1 %>%
group_by(Akreditasi_Dept)%>%
summarize(Jumlah=n())
ggplot(data_bar_chart, aes(x=Akreditasi_Dept, y=Jumlah)) +
geom_bar(stat = "identity",color="steelblue",fill="#69b3a2") +
theme_light() +
labs(x="",
y="",
title= "Sebaran Akreditasi_Dept",
subtitle = "Institut Teknologi Sepuluh Nopember") +
coord_flip()
# Jenjang
data_bar_chart = data_1 %>%
group_by(Jenjang)%>%
summarize(Jumlah=n())
ggplot(data_bar_chart, aes(x=Jenjang, y=Jumlah)) +
geom_bar(stat = "identity",fill="#69b3a2",color="steelblue") +
theme_light() +
labs(x="",
y="",
title= "Sebaran Jenjang",
subtitle = "Institut Teknologi Sepuluh Nopember") +
coord_flip()
# Rumpun Ilmu
data_bar_chart = data_1 %>%
group_by(Rumpun_Ilmu)%>%
summarize(Jumlah=n())
ggplot(data_bar_chart, aes(x=(Rumpun_Ilmu), y=Jumlah)) +
geom_bar(stat = "identity",fill="#69b3a2",color="steelblue") +
theme_light() +
labs(x="",
y="",
title= "Sebaran Rumpun_Ilmu",
subtitle = "Institut Teknologi Sepuluh Nopember") +
coord_flip()
Hubungan Peubah Prediktor dengan Peubah Respon
# Akreditasi_Dept & y
percentData <- data_1 %>%
group_by(Akreditasi_Dept) %>%
count(y) %>%
mutate(ratio=scales::percent(n/sum(n)))
ggplot(data_1,aes(x=factor(Akreditasi_Dept),fill=y,))+
geom_bar(position="fill")+
scale_fill_manual(values=c("#7be217", "#4f58ab"))+
geom_text(data=percentData, aes(y=n,label=ratio), color="white",position=position_fill(vjust=0.5))+
labs(
y = "",
x = "Akreditasi_Dept",
subtitle = "Institut Teknologi Sepuluh Nopember",
title = "Proporsi Peubah Respon Menurut Akreditasi_Dept")
# Jenjang & y
percentData <- data_1 %>%
group_by(Jenjang) %>%
count(y) %>%
mutate(ratio=scales::percent(n/sum(n)))
ggplot(data_1,aes(x=factor(Jenjang),fill=y,))+
geom_bar(position="fill")+
scale_fill_manual(values=c("#7be217", "#4f58ab"))+
geom_text(data=percentData, aes(y=n,label=ratio), color="white",position=position_fill(vjust=0.5))+
labs(
y = "",
x = "Jenjang",
subtitle = "Institut Teknologi Sepuluh Nopember",
title = "Proporsi Peubah Respon Menurut Jenjang")
# Rumpun_Ilmu & y
percentData <- data_1 %>%
group_by(Rumpun_Ilmu) %>%
count(y) %>%
mutate(ratio=scales::percent(n/sum(n)))
ggplot(data_1,aes(x=factor(Rumpun_Ilmu),fill=y,))+
geom_bar(position="fill")+
scale_fill_manual(values=c("#7be217", "#4f58ab"))+
geom_text(data=percentData, aes(y=n,label=ratio), color="white",position=position_fill(vjust=0.5))+
labs(
y = "",
x = "Rumpun_Ilmu",
subtitle = "Institut Teknologi Sepuluh Nopember",
title = "Proporsi Peubah Respon Menurut Rumpun Ilmu")
# Jumlah_Mahasiswa & y
#Boxplot by kategori
ggplot(data_1, aes(y=y,x=Jumlah_Mahasiswa,fill=Jumlah_Mahasiswa,alpha=Jumlah_Mahasiswa)) +
geom_boxplot(fill="#69b3a2", alpha=0.8) +
theme_light() +
labs(x="Jumlah_Mahasiswa",
y="y",
title= "Sebaran Jumlah Mahasiswa Menurut Peubah Respon",
subtitle = "Institut Teknologi Sepuluh Nopember")
# Jumlah_Dosen_Total & y
#Boxplot by kategori
ggplot(data_1, aes(y=y,x=Jumlah_Dosen_Total,fill=Jumlah_Dosen_Total,alpha=Jumlah_Dosen_Total)) +
geom_boxplot(fill="#69b3a2", alpha=0.8) +
theme_light() +
labs(x="Jumlah_Dosen_Total",
y="y",
title= "Sebaran Jumlah Dosen Total Menurut Peubah Respon",
subtitle = "Institut Teknologi Sepuluh Nopember")
# Rasio_Dosen_per_Mahasiswa & y
#Boxplot by kategori
ggplot(data_1, aes(y=y,x=Rasio_Dosen_per_Mahasiswa,fill=Rasio_Dosen_per_Mahasiswa,alpha=Rasio_Dosen_per_Mahasiswa)) +
geom_boxplot(fill="#69b3a2", alpha=0.8) +
theme_light() +
labs(x="Rasio_Dosen_per_Mahasiswa",
y="y",
title= "Sebaran Rasio Dosen per Mahasiswa Menurut Peubah Respon",
subtitle = "Institut Teknologi Sepuluh Nopember")
Data Model
#data yang akan digunakan untuk model
data_sinta <- data_1 %>% select(-c(SINTA_Score_3Yr,Departemen))
str(data_sinta)
## tibble [981 x 7] (S3: tbl_df/tbl/data.frame)
## $ Rumpun_Ilmu : Factor w/ 7 levels "Ekonomi","Kesehatan",..: 7 5 7 7 3 7 3 5 3 3 ...
## $ Jenjang : Factor w/ 5 levels "D3","D4","S1",..: 5 3 4 5 4 5 5 3 4 4 ...
## $ Akreditasi_Dept : Factor w/ 3 levels "Baik","Baik Sekali",..: 2 3 3 2 3 3 3 3 3 3 ...
## $ Jumlah_Dosen_Total : num [1:981] 6 18 6 6 7 6 8 18 7 7 ...
## $ Jumlah_Mahasiswa : num [1:981] 68 324 115 68 107 35 38 324 107 107 ...
## $ Rasio_Dosen_per_Mahasiswa: num [1:981] 0.25 0.191 0.191 0.25 0.187 ...
## $ y : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
Splitting Data
set.seed(478)
in.train <- createDataPartition(as.factor(data_sinta$y),p=0.7,list=F) #partisi data
data_sinta_train <- data_sinta[in.train,] #data training utk modelling
data_sinta_test<- data_sinta[-in.train,] #data testing utk evaluasi model
#proporsi kelas peubah respon pada data
round(prop.table(table(data_sinta_train$y)), digits = 4)
##
## 0 1
## 0.738 0.262
round(prop.table(table(data_sinta_test$y)), digits = 4)
##
## 0 1
## 0.7381 0.2619
Regresi Logistik
Semua Peubah
model_reglog_1 <- glm(y~., data_sinta_train, family=binomial())
summary(model_reglog_1)
##
## Call:
## glm(formula = y ~ ., family = binomial(), data = data_sinta_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6651 -0.6778 -0.5008 0.8076 2.4864
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -3.9719706 0.9823735 -4.043
## Rumpun_IlmuKesehatan 1.4100554 1.1627189 1.213
## Rumpun_IlmuMIPA 1.4284822 0.8015359 1.782
## Rumpun_IlmuPertanian, Perikanan, dan Ternak 2.0519434 2.1802276 0.941
## Rumpun_IlmuSeni, Desain, Media 0.6886598 0.9451778 0.729
## Rumpun_IlmuSosial Humaniora 0.7492052 1.4772318 0.507
## Rumpun_IlmuTeknik 0.7592913 0.7457920 1.018
## JenjangS1 0.4252569 0.5984613 0.711
## JenjangS2 2.4053081 0.6618051 3.634
## JenjangS3 3.0121964 0.6671171 4.515
## Akreditasi_DeptBaik Sekali 0.5947276 0.4869130 1.221
## Akreditasi_DeptUnggul 0.2623166 0.4418709 0.594
## Jumlah_Dosen_Total -0.0378814 0.0217827 -1.739
## Jumlah_Mahasiswa 0.0024493 0.0008421 2.908
## Rasio_Dosen_per_Mahasiswa 1.3038436 0.9184191 1.420
## Pr(>|z|)
## (Intercept) 5.27e-05 ***
## Rumpun_IlmuKesehatan 0.225236
## Rumpun_IlmuMIPA 0.074720 .
## Rumpun_IlmuPertanian, Perikanan, dan Ternak 0.346623
## Rumpun_IlmuSeni, Desain, Media 0.466244
## Rumpun_IlmuSosial Humaniora 0.612037
## Rumpun_IlmuTeknik 0.308630
## JenjangS1 0.477342
## JenjangS2 0.000279 ***
## JenjangS3 6.32e-06 ***
## Akreditasi_DeptBaik Sekali 0.221925
## Akreditasi_DeptUnggul 0.552746
## Jumlah_Dosen_Total 0.082024 .
## Jumlah_Mahasiswa 0.003633 **
## Rasio_Dosen_per_Mahasiswa 0.155706
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 790.25 on 686 degrees of freedom
## Residual deviance: 658.43 on 672 degrees of freedom
## AIC: 688.43
##
## Number of Fisher Scoring iterations: 5
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_reglog_1, data_sinta_train, type = "response")
prediksi_data_train <- as.factor(ifelse(prediksi_prob_data_train > 0.5,"1","0"))
eval_reglog_1_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_reglog_1_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 461 102
## 1 46 78
##
## Accuracy : 0.7846
## 95% CI : (0.7519, 0.8148)
## No Information Rate : 0.738
## P-Value [Acc > NIR] : 0.002697
##
## Kappa : 0.3808
##
## Mcnemar's Test P-Value : 6.156e-06
##
## Sensitivity : 0.4333
## Specificity : 0.9093
## Pos Pred Value : 0.6290
## Neg Pred Value : 0.8188
## Prevalence : 0.2620
## Detection Rate : 0.1135
## Detection Prevalence : 0.1805
## Balanced Accuracy : 0.6713
##
## 'Positive' Class : 1
##
Sensitivity: kemampuan model dalam memprediksi kelaspositif
Specificity: kemampuan model dalam memprediksi kelasnegatif
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_reglog_1, data_sinta_test, type = "response")
prediksi_data_test <- as.factor(ifelse(prediksi_prob_data_test > 0.5,"1","0"))
eval_reglog_1 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_reglog_1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 190 53
## 1 27 24
##
## Accuracy : 0.7279
## 95% CI : (0.6732, 0.7779)
## No Information Rate : 0.7381
## P-Value [Acc > NIR] : 0.681686
##
## Kappa : 0.2102
##
## Mcnemar's Test P-Value : 0.005189
##
## Sensitivity : 0.31169
## Specificity : 0.87558
## Pos Pred Value : 0.47059
## Neg Pred Value : 0.78189
## Prevalence : 0.26190
## Detection Rate : 0.08163
## Detection Prevalence : 0.17347
## Balanced Accuracy : 0.59363
##
## 'Positive' Class : 1
##
Performa model pada data training dan data testing perlu diperhatikan untuk mengetahui adanya overfiting/underfiting
Overfiting terjadi ketika performa model pada data training jauh lebih tinggi jika dibandingkan dengan performa model pada data testing (mempelajari data terlalu baik)
Underfiting terjadi ketika performa model pada data testing jauh lebih tinggi jika dibandingkan dengan performa model pada data training (tidak mempelajari data dengan baik)
#fungsi utk membentuk plot ROC
rocplot=function(pred,truth, ...){
predob=ROCR::prediction(pred,truth)
perf=ROCR::performance(predob,"tpr","fpr")
auc=ROCR::performance(predob,"auc")@y.values
plot(perf,main = auc)
}
#ROC data training
rocplot(prediksi_prob_data_train,data_sinta_train$y)
#ROC data testing
rocplot(prediksi_prob_data_test,data_sinta_test$y)
#variable importance
vip(model_reglog_1, num_features = 50)
Seleksi Peubah
model_reglog_2 <- glm(y~Jenjang+Jumlah_Mahasiswa , data_sinta, family=binomial())
summary(model_reglog_2)
##
## Call:
## glm(formula = y ~ Jenjang + Jumlah_Mahasiswa, family = binomial(),
## data = data_sinta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3486 -0.6552 -0.6293 1.0365 2.2800
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.6209935 0.3946140 -6.642 3.10e-11 ***
## JenjangS1 0.9370228 0.3889559 2.409 0.016 *
## JenjangS2 2.6796420 0.4140474 6.472 9.68e-11 ***
## JenjangS3 2.9373374 0.4403801 6.670 2.56e-11 ***
## Jumlah_Mahasiswa 0.0003598 0.0003758 0.957 0.338
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1128.38 on 980 degrees of freedom
## Residual deviance: 994.74 on 976 degrees of freedom
## AIC: 1004.7
##
## Number of Fisher Scoring iterations: 5
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_reglog_2, data_sinta_train, type = "response")
prediksi_data_train <- as.factor(ifelse(prediksi_prob_data_train > 0.5,"1","0"))
eval_reglog_2_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_reglog_2_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 432 83
## 1 75 97
##
## Accuracy : 0.77
## 95% CI : (0.7367, 0.801)
## No Information Rate : 0.738
## P-Value [Acc > NIR] : 0.0298
##
## Kappa : 0.3966
##
## Mcnemar's Test P-Value : 0.5776
##
## Sensitivity : 0.5389
## Specificity : 0.8521
## Pos Pred Value : 0.5640
## Neg Pred Value : 0.8388
## Prevalence : 0.2620
## Detection Rate : 0.1412
## Detection Prevalence : 0.2504
## Balanced Accuracy : 0.6955
##
## 'Positive' Class : 1
##
rocplot(prediksi_prob_data_train,data_sinta_train$y)
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_reglog_2, data_sinta_test, type = "response")
prediksi_data_test <- as.factor(ifelse(prediksi_prob_data_test > 0.5,"1","0"))
eval_reglog_2 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_reglog_2
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 186 44
## 1 31 33
##
## Accuracy : 0.7449
## 95% CI : (0.691, 0.7937)
## No Information Rate : 0.7381
## P-Value [Acc > NIR] : 0.4251
##
## Kappa : 0.3022
##
## Mcnemar's Test P-Value : 0.1659
##
## Sensitivity : 0.4286
## Specificity : 0.8571
## Pos Pred Value : 0.5156
## Neg Pred Value : 0.8087
## Prevalence : 0.2619
## Detection Rate : 0.1122
## Detection Prevalence : 0.2177
## Balanced Accuracy : 0.6429
##
## 'Positive' Class : 1
##
rocplot(prediksi_prob_data_test,data_sinta_test$y)
vip(model_reglog_2, num_features = 50)
Classification Tree
Model 1 Default
Model dengan hyperparameter
minsplitdancpdefault
model_tree_1 <- rpart(y ~., data = data_sinta_train, method = "class",
control=rpart.control(minsplit = 20, cp=0))
rpart.plot(model_tree_1, extra = 4)
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_tree_1, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_tree_1, newdata=data_sinta_train, type = "class")
eval_tree_1_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_tree_1_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 486 103
## 1 21 77
##
## Accuracy : 0.8195
## 95% CI : (0.7887, 0.8476)
## No Information Rate : 0.738
## P-Value [Acc > NIR] : 2.877e-07
##
## Kappa : 0.4529
##
## Mcnemar's Test P-Value : 3.490e-13
##
## Sensitivity : 0.4278
## Specificity : 0.9586
## Pos Pred Value : 0.7857
## Neg Pred Value : 0.8251
## Prevalence : 0.2620
## Detection Rate : 0.1121
## Detection Prevalence : 0.1426
## Balanced Accuracy : 0.6932
##
## 'Positive' Class : 1
##
rocplot(prediksi_prob_data_train[,2],data_sinta_train$y)
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_tree_1, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_tree_1, newdata=data_sinta_test, type = "class")
eval_tree_1 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_tree_1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 200 54
## 1 17 23
##
## Accuracy : 0.7585
## 95% CI : (0.7054, 0.8063)
## No Information Rate : 0.7381
## P-Value [Acc > NIR] : 0.2344
##
## Kappa : 0.2608
##
## Mcnemar's Test P-Value : 1.934e-05
##
## Sensitivity : 0.29870
## Specificity : 0.92166
## Pos Pred Value : 0.57500
## Neg Pred Value : 0.78740
## Prevalence : 0.26190
## Detection Rate : 0.07823
## Detection Prevalence : 0.13605
## Balanced Accuracy : 0.61018
##
## 'Positive' Class : 1
##
rocplot(prediksi_prob_data_test[,2],data_sinta_test$y)
vip(model_tree_1, num_features = 50)
Model 2
Model dengan hyperparameter
minsplitdancpyang ditentukan sendiri (minsplit=10dancp=0)
model_tree_2 <- rpart(y ~., data = data_sinta_train, method = "class",
control=rpart.control(minsplit = 10, cp=0))
rpart.plot(model_tree_2)
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_tree_2, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_tree_2, newdata=data_sinta_train, type = "class")
eval_tree_2_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_tree_2_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 483 97
## 1 24 83
##
## Accuracy : 0.8239
## 95% CI : (0.7933, 0.8516)
## No Information Rate : 0.738
## P-Value [Acc > NIR] : 6.348e-08
##
## Kappa : 0.476
##
## Mcnemar's Test P-Value : 5.931e-11
##
## Sensitivity : 0.4611
## Specificity : 0.9527
## Pos Pred Value : 0.7757
## Neg Pred Value : 0.8328
## Prevalence : 0.2620
## Detection Rate : 0.1208
## Detection Prevalence : 0.1557
## Balanced Accuracy : 0.7069
##
## 'Positive' Class : 1
##
ROC_model_tree_2_train <- rocit(score=prediksi_prob_data_train[,2], class=data_sinta_train$y)
plot(ROC_model_tree_2_train)
ROC_model_tree_2_train$AUC
## [1] 0.7900668
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_tree_2, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_tree_2, newdata=data_sinta_test, type = "class")
eval_tree_2 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_tree_2
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 199 52
## 1 18 25
##
## Accuracy : 0.7619
## 95% CI : (0.709, 0.8094)
## No Information Rate : 0.7381
## P-Value [Acc > NIR] : 0.195
##
## Kappa : 0.2819
##
## Mcnemar's Test P-Value : 8.005e-05
##
## Sensitivity : 0.32468
## Specificity : 0.91705
## Pos Pred Value : 0.58140
## Neg Pred Value : 0.79283
## Prevalence : 0.26190
## Detection Rate : 0.08503
## Detection Prevalence : 0.14626
## Balanced Accuracy : 0.62086
##
## 'Positive' Class : 1
##
ROC_model_tree_2 <- rocit(score=prediksi_prob_data_test[,2], class=data_sinta_test$y)
plot(ROC_model_tree_2)
ROC_model_tree_2$AUC
## [1] 0.6928302
vip(model_tree_2, num_features = 50)
Model 3 Tuning Minsplit
Model dengan hyperparameter
minsplitoptimum
#mencari minsplit optimum
set.seed(478)
akurasi.semua <- NULL
for(ulangan in 1:100){
acak <- createDataPartition(data_sinta$y, p=0.7, list=FALSE)
data_sinta_train <- data_sinta[acak,]
data_sinta_test <- data_sinta[-acak,]
for (k in 1:30){
pohon <- rpart(y ~ .,
data=data_sinta_train,
method='class',
control=rpart.control(minsplit = k, cp=0))
prediksi.prob <- predict(pohon, data_sinta_test)
prediksi <- ifelse(prediksi.prob > 0.5, "1", "0")[,2]
akurasi <- mean(prediksi == data_sinta_test$y)
akurasi.semua <- rbind(akurasi.semua, c(k, akurasi))
}
}
mean.akurasi <- tapply(akurasi.semua[,2], akurasi.semua[,1], mean)
plot(names(mean.akurasi),mean.akurasi, type="b", xlab="minsplit", ylab="rata-rata akurasi data testing")
model_tree_3 <- rpart(y ~., data = data_sinta_train, method = "class",
control=rpart.control(minsplit = 4, cp=0))
rpart.plot(model_tree_3, extra=4)
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_tree_3, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_tree_3, newdata=data_sinta_train, type = "class")
eval_tree_3_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_tree_3_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 484 101
## 1 23 79
##
## Accuracy : 0.8195
## 95% CI : (0.7887, 0.8476)
## No Information Rate : 0.738
## P-Value [Acc > NIR] : 2.877e-07
##
## Kappa : 0.4574
##
## Mcnemar's Test P-Value : 4.685e-12
##
## Sensitivity : 0.4389
## Specificity : 0.9546
## Pos Pred Value : 0.7745
## Neg Pred Value : 0.8274
## Prevalence : 0.2620
## Detection Rate : 0.1150
## Detection Prevalence : 0.1485
## Balanced Accuracy : 0.6968
##
## 'Positive' Class : 1
##
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_tree_3, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_tree_3, newdata=data_sinta_test, type = "class")
eval_tree_3 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_tree_3
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 200 43
## 1 17 34
##
## Accuracy : 0.7959
## 95% CI : (0.7453, 0.8405)
## No Information Rate : 0.7381
## P-Value [Acc > NIR] : 0.012733
##
## Kappa : 0.4076
##
## Mcnemar's Test P-Value : 0.001249
##
## Sensitivity : 0.4416
## Specificity : 0.9217
## Pos Pred Value : 0.6667
## Neg Pred Value : 0.8230
## Prevalence : 0.2619
## Detection Rate : 0.1156
## Detection Prevalence : 0.1735
## Balanced Accuracy : 0.6816
##
## 'Positive' Class : 1
##
vip(model_tree_3, num_features = 50)
Model 4 Opsi CP
Model dengan hyperparameter
cpoptimum
set.seed(478)
model_tree_4 <- rpart(y ~ ., data=data_sinta_train,
method='class',
control=rpart.control(minsplit = 20, cp=0))
printcp(model_tree_4)
##
## Classification tree:
## rpart(formula = y ~ ., data = data_sinta_train, method = "class",
## control = rpart.control(minsplit = 20, cp = 0))
##
## Variables actually used in tree construction:
## [1] Jumlah_Mahasiswa Rasio_Dosen_per_Mahasiswa
## [3] Rumpun_Ilmu
##
## Root node error: 180/687 = 0.26201
##
## n= 687
##
## CP nsplit rel error xerror xstd
## 1 0.1000000 0 1.00000 1.00000 0.064031
## 2 0.0666667 1 0.90000 1.01667 0.064371
## 3 0.0222222 2 0.83333 0.90556 0.061945
## 4 0.0194444 4 0.78889 0.92222 0.062334
## 5 0.0037037 6 0.75000 0.86111 0.060865
## 6 0.0000000 9 0.73889 0.88889 0.061548
model_tree_4 <- rpart(y ~ ., data=data_sinta_train,
method='class',
control=rpart.control(minsplit = 20, cp=0.0121212))
rpart.plot(model_tree_4)
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_tree_4, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_tree_4, newdata=data_sinta_train, type = "class")
eval_tree_4_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_tree_4_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 485 113
## 1 22 67
##
## Accuracy : 0.8035
## 95% CI : (0.7718, 0.8326)
## No Information Rate : 0.738
## P-Value [Acc > NIR] : 3.565e-05
##
## Kappa : 0.3929
##
## Mcnemar's Test P-Value : 9.486e-15
##
## Sensitivity : 0.37222
## Specificity : 0.95661
## Pos Pred Value : 0.75281
## Neg Pred Value : 0.81104
## Prevalence : 0.26201
## Detection Rate : 0.09753
## Detection Prevalence : 0.12955
## Balanced Accuracy : 0.66441
##
## 'Positive' Class : 1
##
ROC_model_tree_4_train <- rocit(score=prediksi_prob_data_train[,2], class=data_sinta_train$y)
plot(ROC_model_tree_4_train)
ROC_model_tree_4_train$AUC
## [1] 0.6874151
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_tree_4, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_tree_4, newdata=data_sinta_test, type = "class")
eval_tree_4 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_tree_4
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 197 44
## 1 20 33
##
## Accuracy : 0.7823
## 95% CI : (0.7307, 0.8281)
## No Information Rate : 0.7381
## P-Value [Acc > NIR] : 0.04662
##
## Kappa : 0.374
##
## Mcnemar's Test P-Value : 0.00404
##
## Sensitivity : 0.4286
## Specificity : 0.9078
## Pos Pred Value : 0.6226
## Neg Pred Value : 0.8174
## Prevalence : 0.2619
## Detection Rate : 0.1122
## Detection Prevalence : 0.1803
## Balanced Accuracy : 0.6682
##
## 'Positive' Class : 1
##
ROC_model_tree_4 <- rocit(score=prediksi_prob_data_test[,2], class=data_sinta_test$y)
plot(ROC_model_tree_4)
ROC_model_tree_4$AUC
## [1] 0.7061165
vip(model_tree_4, num_features = 50)
Bagging
Model Default
Model dengan hyperparameter
nbaggdefault dantreedefault
model_bag_1 <- ipred::bagging(y ~ ., data=data_sinta_train, coob = TRUE,
nbagg=25,
control= rpart.control(minsplit=2, cp=0))
model_bag_1
##
## Bagging classification trees with 25 bootstrap replications
##
## Call: bagging.data.frame(formula = y ~ ., data = data_sinta_train,
## coob = TRUE, nbagg = 25, control = rpart.control(minsplit = 2,
## cp = 0))
##
## Out-of-bag estimate of misclassification error: 0.2242
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_bag_1, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_bag_1, data_sinta_train,type="class")
eval_model_bag_1_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_model_bag_1_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 472 91
## 1 35 89
##
## Accuracy : 0.8166
## 95% CI : (0.7856, 0.8448)
## No Information Rate : 0.738
## P-Value [Acc > NIR] : 7.511e-07
##
## Kappa : 0.4729
##
## Mcnemar's Test P-Value : 9.594e-07
##
## Sensitivity : 0.4944
## Specificity : 0.9310
## Pos Pred Value : 0.7177
## Neg Pred Value : 0.8384
## Prevalence : 0.2620
## Detection Rate : 0.1295
## Detection Prevalence : 0.1805
## Balanced Accuracy : 0.7127
##
## 'Positive' Class : 1
##
ROC_model_bag_1_train <- rocit(score=prediksi_prob_data_train[,2], class=data_sinta_train$y)
plot(ROC_model_bag_1_train)
ROC_model_bag_1_train$AUC
## [1] 0.818524
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_bag_1, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_bag_1, data_sinta_test,type="class")
eval_model_bag_1<- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_model_bag_1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 193 37
## 1 24 40
##
## Accuracy : 0.7925
## 95% CI : (0.7416, 0.8374)
## No Information Rate : 0.7381
## P-Value [Acc > NIR] : 0.01811
##
## Kappa : 0.4324
##
## Mcnemar's Test P-Value : 0.12443
##
## Sensitivity : 0.5195
## Specificity : 0.8894
## Pos Pred Value : 0.6250
## Neg Pred Value : 0.8391
## Prevalence : 0.2619
## Detection Rate : 0.1361
## Detection Prevalence : 0.2177
## Balanced Accuracy : 0.7044
##
## 'Positive' Class : 1
##
ROC_model_bag_1 <- rocit(score=prediksi_prob_data_test[,2], class=data_sinta_test$y)
plot(ROC_model_bag_1)
ROC_model_bag_1$AUC
## [1] 0.7978335
Random Forest
Model 1 Default
Model dengan hyperparameter
ntree,mtrydefault
model_rf_1 <- randomForest::randomForest(y ~ ., ntree=500,
data=data_sinta_train)
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_rf_1, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_rf_1, data_sinta_train,type="class")
eval_model_rf_1_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_model_rf_1_train
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 484 101
## 1 23 79
##
## Accuracy : 0.8195
## 95% CI : (0.7887, 0.8476)
## No Information Rate : 0.738
## P-Value [Acc > NIR] : 2.877e-07
##
## Kappa : 0.4574
##
## Mcnemar's Test P-Value : 4.685e-12
##
## Sensitivity : 0.4389
## Specificity : 0.9546
## Pos Pred Value : 0.7745
## Neg Pred Value : 0.8274
## Prevalence : 0.2620
## Detection Rate : 0.1150
## Detection Prevalence : 0.1485
## Balanced Accuracy : 0.6968
##
## 'Positive' Class : 1
##
ROC_model_rf_1_train <- rocit(score=prediksi_prob_data_train[,2], class=data_sinta_train$y)
plot(ROC_model_rf_1_train)
ROC_model_rf_1_train$AUC
## [1] 0.8325115
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_rf_1, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_rf_1, data_sinta_test,type="class")
eval_model_rf_1<- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_model_rf_1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 200 43
## 1 17 34
##
## Accuracy : 0.7959
## 95% CI : (0.7453, 0.8405)
## No Information Rate : 0.7381
## P-Value [Acc > NIR] : 0.012733
##
## Kappa : 0.4076
##
## Mcnemar's Test P-Value : 0.001249
##
## Sensitivity : 0.4416
## Specificity : 0.9217
## Pos Pred Value : 0.6667
## Neg Pred Value : 0.8230
## Prevalence : 0.2619
## Detection Rate : 0.1156
## Detection Prevalence : 0.1735
## Balanced Accuracy : 0.6816
##
## 'Positive' Class : 1
##
ROC_model_rf_1 <- rocit(score=prediksi_prob_data_test[,2], class=data_sinta_test$y)
plot(ROC_model_rf_1)
ROC_model_rf_1$AUC
## [1] 0.8048956
vip(model_rf_1, num_features = 50)
Perbandingan Hasil Model
hasil_eval <- rbind(
c(eval_reglog_1$overall[1], eval_reglog_1$byClass[1], eval_reglog_1$byClass[2]),
c(eval_reglog_2$overall[1], eval_reglog_2$byClass[1], eval_reglog_2$byClass[2]),
c(eval_tree_1$overall[1], eval_tree_1$byClass[1], eval_tree_1$byClass[2]),
c(eval_tree_2$overall[1], eval_tree_2$byClass[1], eval_tree_2$byClass[2]),
c(eval_tree_3$overall[1], eval_tree_3$byClass[1], eval_tree_3$byClass[2]),
c(eval_tree_4$overall[1], eval_tree_4$byClass[1], eval_tree_4$byClass[2]),
c(eval_model_bag_1$overall[1], eval_model_bag_1$byClass[1], eval_model_bag_1$byClass[2]),
c(eval_model_rf_1$overall[1], eval_model_rf_1$byClass[1], eval_model_rf_1$byClass[2]))
row.names(hasil_eval) <-
c("RegLog Semua Peubah","RegLog Seleksi Peubah",
"ClassTree 1","ClassTree 2","ClassTree 3","ClassTree 4",
"Bagging 1", "RandomForest 1")
hasil_eval <- as.data.frame(hasil_eval)
dplyr::arrange(.data = hasil_eval, desc(Accuracy))