R04STA1381: Analisis Data Bagian 1
Library
library(tidyverse)## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.0 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(kableExtra)## Warning: package 'kableExtra' was built under R version 4.1.3
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(ggplot2)
library(ggthemes)## Warning: package 'ggthemes' was built under R version 4.1.3
library(stringr)
library(reshape2)## Warning: package 'reshape2' was built under R version 4.1.2
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(mice)## Warning: package 'mice' was built under R version 4.1.3
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(nortest)
library(DescTools)## Warning: package 'DescTools' was built under R version 4.1.2
library(caret)## Warning: package 'caret' was built under R version 4.1.2
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:DescTools':
##
## MAE, RMSE
## The following object is masked from 'package:purrr':
##
## lift
library(rpart)
library(rpart.plot)## Warning: package 'rpart.plot' was built under R version 4.1.3
library(ROCit)## Warning: package 'ROCit' was built under R version 4.1.3
library(PRROC)## Warning: package 'PRROC' was built under R version 4.1.3
library(ROCR)## Warning: package 'ROCR' was built under R version 4.1.3
library(vip)## Warning: package 'vip' was built under R version 4.1.3
##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
library(pillar)##
## Attaching package: 'pillar'
## The following object is masked from 'package:mice':
##
## squeeze
## The following object is masked from 'package:dplyr':
##
## dim_desc
library(readxl)## Warning: package 'readxl' was built under R version 4.1.3
library(dplyr)Data
Data Formatting
df_authors_ok$Jenjang <- as.factor(df_authors_ok$Jenjang)
df_authors_ok$Akreditasi <- as.factor(df_authors_ok$Akreditasi)
df_authors_ok$Status <- as.factor(df_authors_ok$Status)
df_authors_ok$keterangan <- as.factor(df_authors_ok$keterangan)#struktur data setelah formating
glimpse(df_authors_ok[,c("Jenjang", "Akreditasi", "Status", "keterangan")])## Rows: 1,882
## Columns: 4
## $ Jenjang <fct> S3, S2, Sp-1, S3, S3, S3, Sp-1, S3, S2, S2, S2, S3, S1, S1,~
## $ Akreditasi <fct> Belum Terakditasi, Baik Sekali, Baik Sekali, Unggul, Baik S~
## $ Status <fct> Aktif, Aktif, Aktif, Aktif, Aktif, Aktif, Aktif, Aktif, Akt~
## $ keterangan <fct> Diatas Median, Diatas Median, Diatas Median, Diatas Median,~
Re-Level Factor
levels(df_authors_ok$Jenjang) #level awal## [1] "D3" "D4" "Profesi" "S1" "S2" "S3" "Sp-1"
## [8] "Sp-2" "Unknown"
df_authors_ok$Jenjang <- factor(df_authors_ok$Jenjang,levels(df_authors_ok$Jenjang)[c(9,1,2,4,3,7,5,8,6)]) #re-level
levels(df_authors_ok$Jenjang) #setelah re-level## [1] "Unknown" "D3" "D4" "S1" "Profesi" "Sp-1" "S2"
## [8] "Sp-2" "S3"
levels(df_authors_ok$Akreditasi)## [1] "Baik" "Baik Sekali" "Belum Terakditasi"
## [4] "NA" "Unggul"
df_authors_ok$Akreditasi <- factor(df_authors_ok$Akreditasi,levels(df_authors_ok$Akreditasi)[c(4,3,1,2,5)])
levels(df_authors_ok$Akreditasi)## [1] "NA" "Belum Terakditasi" "Baik"
## [4] "Baik Sekali" "Unggul"
Rumpun Ilmu dari Prodi
df_authors_ok$Kode_Prodi## [1] "45001" "15101" "11725" "13001" "44002" "54001" "12704" "54041" "11106"
## [10] "48101" "54145" "13001" "54252" "48201" "47001" "41231" "54145" "13001"
## [19] "54051" "41111" "22201" "54231" "41101" "54241" "12101" "54001" "11729"
## [28] "54246" "54295" "54001" "44101" "13211" "47201" "15101" "54241" "13701"
## [37] "12701" "54211" "11725" "55102" "47101" "22001" "73201" "14101" "54231"
## [46] "11725" "13001" "47101" "13141" "47001" "54001" "54294" "54231" "11106"
## [55] "20101" "61201" "22101" "54101" "22201" "44002" "48001" "NA" "54252"
## [64] "14101" "54241" "22101" "45001" "11109" "54111" "54241" "20101" "34002"
## [73] "74201" "54261" "88003" "54201" "48001" "11704" "54151" "11001" "41231"
## [82] "54106" "54241" "22001" "54101" "54141" "14201" "54131" "11001" "41103"
## [91] "54211" "54041" "13211" "13161" "54201" "13101" "48201" "41231" "20001"
## [100] "95129" "13141" "54106" "54201" "20001" "11106" "54001" "13001" "54108"
## [109] "54131" "54111" "38201" "54001" "54108" "54295" "22201" "54001" "13001"
## [118] "54001" "95129" "54211" "47001" "13141" "79002" "54051" "13701" "54141"
## [127] "70001" "11734" "54294" "54231" "11707" "54294" "54231" "11109" "79103"
## [136] "55201" "54242" "20001" "13161" "54249" "54031" "54001" "45201" "46104"
## [145] "31201" "54295" "54146" "14201" "60003" "11708" "22101" "95103" "13101"
## [154] "47101" "11001" "14201" "41111" "11707" "54243" "69001" "22001" "54231"
## [163] "54231" "54111" "13101" "74001" "54242" "13211" "20101" "13141" "46104"
## [172] "54101" "20001" "NA" "46201" "21101" "54231" "14101" "13161" "13101"
## [181] "11702" "54246" "22201" "12705" "54231" "95129" "13001" "11109" "54231"
## [190] "64201" "41201" "33201" "13211" "54001" "54342" "21201" "20001" "13001"
## [199] "61201" "20101" "20101" "54243" "41322" "13161" "54251" "20101" "15101"
## [208] "54241" "44201" "54051" "13161" "49101" "11718" "20001" "NA" "22101"
## [217] "13161" "54001" "95130" "44101" "54256" "13101" "34002" "13201" "54241"
## [226] "47201" "54211" "57201" "20201" "23201" "54251" "55102" "54146" "95129"
## [235] "13141" "54201" "13211" "54335" "48901" "44201" "45201" "54211" "54256"
## [244] "21101" "11724" "41322" "45102" "11734" "48901" "11718" "64201" "33101"
## [253] "41103" "54201" "22001" "14101" "54231" "95129" "22201" "13201" "NA"
## [262] "54201" "95101" "NA" "54151" "48201" "13161" "20101" "33101" "54201"
## [271] "21201" "44101" "54141" "73201" "54151" "54211" "54231" "54031" "22101"
## [280] "13101" "35103" "11201" "54131" "21001" "11709" "54231" "54051" "54151"
## [289] "54243" "11724" "54031" "54241" "47101" "54131" "33101" "41103" "12705"
## [298] "54201" "20201" "61001" "54231" "95101" "45001" "12101" "46201" "61101"
## [307] "13161" "NA" "31101" "54146" "15101" "22001" "54211" "54106" "34201"
## [316] "79203" "54141" "35103" "54252" "54001" "74201" "54231" "13211" "54241"
## [325] "NA" "23001" "NA" "54111" "41231" "NA" "22101" "54131" "54111"
## [334] "54335" "54252" "55102" "48001" "12705" "54251" "64201" "20001" "38201"
## [343] "11109" "13001" "23902" "21201" "54246" "14201" "21001" "88003" "21201"
## [352] "41201" "44201" "54231" "14201" "41111" "21101" "54231" "54251" "46104"
## [361] "54051" "26101" "54201" "95130" "54201" "47201" "20201" "55102" "13201"
## [370] "54231" "74102" "54211" "54031" "54242" "13701" "54231" "14401" "41103"
## [379] "54335" "54251" "54242" "54111" "54141" "54242" "54106" "95103" "74235"
## [388] "80101" "54243" "46104" "49101" "54131" "48001" "39101" "23001" "69001"
## [397] "54231" "61001" "22001" "45102" "54031" "54251" "48001" "13201" "54295"
## [406] "11715" "38201" "33201" "21001" "54001" "36202" "23201" "25201" "54251"
## [415] "60003" "47001" "11704" "47001" "21001" "48901" "61102" "48101" "NA"
## [424] "41322" "48101" "41101" "61107" "47101" "11201" "54201" "54246" "54231"
## [433] "74235" "21201" "NA" "54201" "39101" "54231" "22201" "81101" "36101"
## [442] "54295" "35103" "22201" "12201" "22101" "41322" "41231" "54295" "38201"
## [451] "NA" "54251" "11719" "46201" "54252" "11704" "44002" "11704" "45001"
## [460] "54051" "13101" "41101" "54211" "46104" "34002" "26101" "70201" "54241"
## [469] "23101" "48001" "54251" "54031" "34201" "36201" "61107" "13101" "11729"
## [478] "38201" "21001" "54201" "22201" "13101" "12901" "34101" "26201" "54242"
## [487] "61201" "74201" "95101" "36202" "64201" "12706" "49201" "54342" "12705"
## [496] "54251" "11201" "11708" "11708" "95130" "54252" "12901" "12705" "54249"
## [505] "54201" "12705" "73201" "33201" "54261" "14901" "54241" "60001" "47201"
## [514] "54242" "11201" "61001" "54251" "62101" "41103" "54108" "54151" "54251"
## [523] "82001" "48101" "74201" "11703" "55201" "22201" "48201" "54201" "54251"
## [532] "54251" "11721" "13201" "67001" "54211" "54243" "54211" "22201" "74101"
## [541] "23001" "23101" "13211" "54243" "54001" "26101" "54335" "NA" "21201"
## [550] "54246" "54145" "54231" "25201" "74101" "54241" "36101" "79103" "54231"
## [559] "74101" "74101" "67201" "20101" "33201" "13201" "54145" "54231" "45001"
## [568] "47001" "34201" "61101" "NA" "54201" "48101" "48901" "61107" "26201"
## [577] "38201" "54251" "54041" "48201" "11201" "NA" "34201" "61101" "11201"
## [586] "14901" "46101" "54211" "55201" "NA" "20101" "95103" "61102" "11715"
## [595] "45102" "13211" "NA" "48901" "11201" "54131" "69201" "48201" "54252"
## [604] "47201" "54251" "35103" "13211" "44101" "14901" "62001" "54211" "11106"
## [613] "54241" "11001" "54201" "54211" "35101" "54243" "81201" "12901" "12201"
## [622] "13201" "NA" "54106" "47201" "62001" "31101" "48201" "48201" "45201"
## [631] "11708" "13201" "11901" "54241" "11707" "49101" "54231" "46101" "54231"
## [640] "13201" "11712" "81201" "54001" "61001" "13101" "11708" "54295" "81201"
## [649] "54106" "46201" "14101" "11712" "70101" "13201" "54294" "11201" "54249"
## [658] "54246" "25201" "45102" "26201" "22201" "69101" "45001" "26201" "54231"
## [667] "70201" "62001" "54211" "13101" "11705" "61102" "12101" "74201" "11201"
## [676] "95103" "95105" "23902" "12706" "54211" "64201" "54302" "54111" "11202"
## [685] "23902" "11711" "54241" "33101" "64201" "33101" "74235" "48101" "46201"
## [694] "14401" "36201" "48201" "12901" "79001" "44201" "49101" "41201" "11703"
## [703] "13201" "20201" "46101" "54141" "20201" "13201" "12201" "79102" "13101"
## [712] "44002" "23201" "48201" "38201" "38201" "65101" "54231" "11715" "11201"
## [721] "74001" "41231" "11702" "54211" "46201" "67101" "48201" "54245" "82101"
## [730] "35201" "31101" "11724" "54243" "47001" "11703" "25201" "61106" "20201"
## [739] "54261" "31101" "54243" "11201" "22201" "11712" "NA" "26201" "11201"
## [748] "12201" "62101" "54242" "13201" "39101" "54245" "12703" "22201" "54231"
## [757] "54251" "54108" "36201" "NA" "54242" "11201" "25201" "61101" "13201"
## [766] "54211" "21101" "26101" "46201" "54142" "12707" "12703" "11109" "54242"
## [775] "23101" "54256" "61001" "54231" "61201" "54211" "11708" "23201" "79002"
## [784] "54294" "13101" "54251" "11202" "21201" "22201" "63001" "82001" "35201"
## [793] "95103" "12704" "NA" "14201" "67001" "54241" "74001" "NA" "79002"
## [802] "79214" "64201" "48201" "54245" "45102" "13211" "34201" "61001" "45102"
## [811] "41322" "54246" "26101" "11901" "11201" "54342" "13201" "11201" "54241"
## [820] "54251" "46101" "22201" "54245" "44201" "74102" "54231" "54146" "NA"
## [829] "54243" "22201" "12704" "55102" "41101" "47201" "41101" "36201" "54231"
## [838] "44101" "44201" "57201" "70001" "54249" "11701" "54243" "54245" "61201"
## [847] "22101" "11723" "54231" "62101" "36201" "12701" "11732" "74201" "11201"
## [856] "44201" "47101" "54256" "13141" "48201" "36202" "11202" "57201" "12705"
## [865] "62001" "74102" "11201" "54106" "54231" "54318" "54041" "12703" "93304"
## [874] "54241" "54146" "95130" "54243" "69201" "11711" "54252" "33201" "61102"
## [883] "74201" "41231" "55201" "11201" "70101" "NA" "64201" "54342" "74201"
## [892] "54231" "21201" "41111" "11724" "41111" "41103" "23101" "79102" "11201"
## [901] "54246" "13201" "48901" "48901" "34101" "60101" "11701" "54251" "23001"
## [910] "54245" "11201" "12201" "61101" "47201" "12707" "74235" "11902" "54318"
## [919] "54261" "61101" "12707" "12707" "54146" "54245" "70201" "79103" "41101"
## [928] "20201" "11201" "54318" "49201" "11201" "54201" "94203" "11201" "12703"
## [937] "62201" "95101" "23201" "62101" "21101" "12707" "36101" "23001" "79101"
## [946] "54108" "81201" "22201" "34201" "34201" "11729" "88003" "54249" "47201"
## [955] "54251" "74102" "54335" "NA" "67001" "63201" "38201" "14401" "11202"
## [964] "70201" "74001" "26201" "11902" "54249" "82101" "54108" "74101" "36101"
## [973] "11710" "12901" "35103" "82101" "54231" "11721" "81101" "46201" "12201"
## [982] "49101" "12901" "12201" "13201" "11708" "61101" "12901" "54249" "80201"
## [991] "61107" "35201" "73201" "48001" "12703" "14201" "54241" "74201" "79103"
## [1000] "41231" "NA" "60101" "11711" "NA" "36101" "63201" "33201" "11901"
## [1009] "54251" "69001" "20201" "70201" "74102" "54231" "NA" "74001" "11106"
## [1018] "55201" "79001" "13201" "38201" "22201" "54241" "35201" "14401" "61102"
## [1027] "69101" "12201" "20101" "13201" "61001" "74001" "49201" "46201" "54211"
## [1036] "41111" "20201" "62001" "11719" "38201" "14401" "82201" "48201" "12707"
## [1045] "11201" "62201" "NA" "61107" "36202" "74001" "26201" "54142" "54251"
## [1054] "54256" "63001" "11201" "63001" "74102" "61001" "11705" "36202" "54231"
## [1063] "13211" "12706" "13211" "15101" "26201" "54261" "31201" "21001" "61101"
## [1072] "79101" "95101" "35103" "NA" "36202" "54242" "95105" "12201" "48901"
## [1081] "11705" "79202" "11201" "34201" "74101" "11705" "54243" "74102" "82001"
## [1090] "54241" "23001" "34201" "35201" "74101" "54241" "62201" "36201" "23201"
## [1099] "26201" "11709" "11705" "23201" "54342" "26101" "69101" "36202" "47201"
## [1108] "23902" "35103" "65101" "11705" "54146" "13201" "54261" "49201" "48001"
## [1117] "11718" "12706" "61201" "34101" "31201" "46201" "12701" "54243" "41201"
## [1126] "15401" "60003" "46201" "82001" "22102" "70101" "11724" "79201" "35201"
## [1135] "22201" "12101" "63001" "11202" "69101" "82201" "11732" "31201" "NA"
## [1144] "95105" "23001" "13201" "14201" "11202" "33201" "34002" "35201" "14201"
## [1153] "20101" "11711" "54245" "11712" "61102" "81101" "54231" "11807" "74001"
## [1162] "61201" "13201" "11901" "11724" "11201" "60001" "61201" "20201" "11721"
## [1171] "62201" "54041" "67001" "54231" "69201" "63201" "54211" "54961" "67101"
## [1180] "36201" "46101" "11201" "13161" "62201" "13201" "34201" "NA" "57201"
## [1189] "49201" "74101" "88003" "31201" "95105" "54245" "35103" "12901" "54231"
## [1198] "46201" "79101" "65201" "11723" "36201" "49201" "36201" "54294" "82201"
## [1207] "36201" "81101" "94203" "74001" "14401" "82201" "74001" "74201" "54142"
## [1216] "74102" "65101" "95105" "54142" "31101" "48201" "82001" "11706" "79203"
## [1225] "NA" "36202" "13211" "48201" "NA" "64201" "70001" "65201" "45201"
## [1234] "60001" "13201" "23201" "88003" "14401" "54231" "23201" "20201" "55201"
## [1243] "NA" "14201" "54243" "79204" "13201" "54231" "45201" "23201" "54146"
## [1252] "44101" "62101" "39101" "79205" "80201" "23201" "11201" "64201" "12703"
## [1261] "36202" "79002" "11902" "54961" "NA" "95105" "NA" "54242" "54242"
## [1270] "80101" "11201" "11705" "95129" "11712" "82101" "54142" "80201" "80101"
## [1279] "33101" "48201" "NA" "65101" "14201" "12201" "12901" "34201" "34101"
## [1288] "61106" "47201" "12901" "74001" "41201" "62201" "48201" "94203" "12201"
## [1297] "11201" "57201" "57201" "54251" "79002" "11201" "82201" "13211" "74201"
## [1306] "74201" "35201" "54295" "73201" "47201" "70101" "14401" "11705" "11201"
## [1315] "54231" "14401" "95105" "14201" "54295" "55201" "62201" "14401" "11902"
## [1324] "79205" "46201" "74001" "11201" "79001" "14901" "74235" "54261" "13201"
## [1333] "48201" "79201" "12201" "25201" "54245" "54261" "80101" "48901" "54261"
## [1342] "60201" "11701" "79101" "61101" "35201" "11201" "61201" "79205" "49201"
## [1351] "74201" "61201" "54242" "11702" "38201" "55201" "46101" "74201" "23201"
## [1360] "54261" "79201" "13701" "21101" "36202" "47201" "NA" "57201" "12703"
## [1369] "55201" "22201" "41201" "41231" "11202" "74235" "11807" "81101" "79204"
## [1378] "11201" "35201" "12701" "47201" "23201" "61102" "36202" "21201" "46201"
## [1387] "82101" "69201" "20201" "95105" "63201" "79205" "13201" "44002" "69201"
## [1396] "74201" "70101" "26201" "61102" "20101" "62901" "74001" "61106" "79204"
## [1405] "NA" "61001" "60201" "11701" "74235" "25201" "11201" "35201" "74101"
## [1414] "69201" "73201" "79201" "64201" "11202" "14201" "54242" "69001" "11201"
## [1423] "54261" "61101" "69201" "NA" "62201" "74201" "23902" "69201" "79211"
## [1432] "54211" "54241" "NA" "67201" "67201" "74201" "12701" "79205" "80201"
## [1441] "79202" "12201" "41231" "11750" "54252" "63201" "64201" "64201" "79205"
## [1450] "60201" "NA" "23201" "23201" "NA" "70201" "23201" "12901" "54245"
## [1459] "15101" "34002" "73201" "20201" "67201" "95130" "79102" "79202" "11201"
## [1468] "62101" "79203" "NA" "65201" "11711" "NA" "11723" "79001" "21201"
## [1477] "22201" "13201" "11724" "63001" "74001" "60001" "79002" "13201" "70001"
## [1486] "74102" "79202" "79203" "62901" "54242" "11901" "11711" "70201" "54245"
## [1495] "74001" "65201" "11703" "39101" "79202" "79001" "54201" "74201" "67201"
## [1504] "79211" "79002" "65201" "57201" "79203" "54261" "61201" "11719" "NA"
## [1513] "12901" "74101" "11725" "81201" "74201" "54241" "69201" "79211" "14901"
## [1522] "79202" "73201" "21201" "33101" "11001" "NA" "74201" "54251" "74235"
## [1531] "NA" "54318" "12201" "11711" "74201" "79102" "NA" "60201" "79102"
## [1540] "46201" "79211" "11201" "74201" "79001" "74101" "13201" "36201" "67001"
## [1549] "54961" "NA" "54251" "79214" "NA" "61201" "79203" "54242" "79203"
## [1558] "79103" "60001" "74201" "NA" "11201" "64201" "54251" "NA" "60201"
## [1567] "46201" "79204" "73201" "69001" "74201" "12201" "11721" "74001" "NA"
## [1576] "34201" "NA" "74201" "63001" "61107" "47201" "61201" "45201" "NA"
## [1585] "82101" "79203" "74201" "54231" "94203" "61201" "11202" "13201" "NA"
## [1594] "74102" "70101" "46201" "74201" "20101" "79203" "NA" "74201" "74201"
## [1603] "23101" "11706" "79203" "73201" "74201" "74235" "79202" "69201" "25201"
## [1612] "74102" "74101" "46201" "79203" "60001" "54211" "11201" "11201" "60001"
## [1621] "NA" "73201" "62201" "55201" "93304" "NA" "NA" "NA" "NA"
## [1630] "11901" "NA" "14401" "74201" "54252" "NA" "NA" "41322" "11702"
## [1639] "73201" "NA" "70201" "79204" "61201" "NA" "35201" "NA" "11201"
## [1648] "12901" "41201" "NA" "14201" "94203" "33201" "81201" "74201" "36201"
## [1657] "NA" "70201" "79204" "12201" "65201" "60201" "70201" "12901" "49201"
## [1666] "74101" "79202" "63001" "11718" "13201" "11901" "74102" "34201" "11723"
## [1675] "60201" "NA" "11201" "62201" "NA" "23902" "61201" "79214" "60201"
## [1684] "70201" "12701" "NA" "74102" "61201" "79101" "79201" "64201" "74102"
## [1693] "70101" "20101" "63201" "95105" "95105" "63001" "74001" "63001" "74001"
## [1702] "12706" "67101" "60001" "74101" "46201" "62201" "60201" "61107" "60201"
## [1711] "82201" "54243" "74201" "74201" "NA" "62201" "79204" "74201" "61201"
## [1720] "70201" "NA" "69201" "61107" "11710" "63201" "79202" "NA" "49201"
## [1729] "47201" "60101" "46201" "NA" "33201" "60201" "12704" "12201" "80201"
## [1738] "74102" "11201" "23101" "46201" "11201" "45201" "NA" "79202" "NA"
## [1747] "79102" "79204" "79203" "79205" "23201" "20101" "20101" "20101" "20101"
## [1756] "20101" "20101" "20101" "NA" "20101" "20101" "20101" "20101" "20101"
## [1765] "NA" "11201" "48201" "NA" "54242" "21401" "67201" "11734" "11725"
## [1774] "63201" "79211" "NA" "11715" "21201" "21201" "69201" "69201" "69101"
## [1783] "70001" "61101" "54294" "11201" "80201" "12706" "79201" "12901" "73201"
## [1792] "31201" "70201" "13141" "55201" "NA" "74201" "74101" "74101" "11901"
## [1801] "79204" "79202" "74101" "79202" "11201" "11715" "11701" "11201" "11201"
## [1810] "11201" "11201" "11201" "11807" "11201" "11709" "11201" "NA" "12201"
## [1819] "12101" "12701" "NA" "48201" "NA" "64201" "54231" "64201" "NA"
## [1828] "64201" "11902" "54142" "11202" "73201" "73201" "74102" "54961" "63201"
## [1837] "NA" "20201" "NA" "11202" "NA" "93304" "54261" "NA" "NA"
## [1846] "54961" "79201" "62201" "12706" "NA" "NA" "21201" "60201" "11201"
## [1855] "93304" "67201" "73201" "48901" "65101" "70201" "11710" "NA" "93304"
## [1864] "44201" "93304" "34201" "34201" "63201" "NA" "13201" "NA" "69201"
## [1873] "74201" "47201" "NA" "93304" "62201" "60201" "11202" "13201" "NA"
## [1882] "NA"
#Membentuk rumpun ilmu berdasarkan kode prodi 5 digit
df_rumpun <- df_authors_ok %>%
select(Kode_Prodi,Prodi) %>%
group_by(Kode_Prodi,Prodi) %>%
summarize() %>%
mutate(Kode_Prodi_5Digit = substr(Kode_Prodi,1,5))## `summarise()` has grouped output by 'Kode_Prodi'. You can override using the `.groups` argument.
view(df_rumpun)#Membentuk rumpun ilmu berdasarkan kode prodi 2 digit
df_rumpun <- df_rumpun %>%
mutate(Rumpun_Ilmu = case_when(Kode_Prodi_5Digit %in% c(79102,95127,88003,79002,79001,93202,79203,79211,79201,79202,79204,79205,79214,93304,81201,81101,79101,79103) ~ "Ilmu Budaya",
Kode_Prodi_5Digit %in% c(11106,13111,15101,14101,13101,11109,12101,13141,11807,11732,11734,48001,11001,12001,13001,12201,73201,14401,14401,11409,11201,54261,13201,13211,11202,12301,11902,48901,11901,12901,54961,14901,11706,11704,48201,14201,12705,11707,11717,12701,11712,13701,11708,11721,11710,12704,11711,12703,11718,11719,11702,11715,11704,11701,12702,11705,11725,11703,12706,12707,12707,11724,11709,11723,11750,11729,13163,48101) ~ "Kesehatan",
Kode_Prodi_5Digit %in% c(41103,54131,41101,95103,35103,54108,41111,20101,26101,55102,95101,21101,22102,36101,31101,22101,39101,23001,22001,60003,20001,21001,34002,35201,20201,34201,26201,55201,38201,25201,21201,36201,31201,41201,22201,36202,23902,23201,23101) ~ "Teknik",
Kode_Prodi_5Digit %in% c(82101,74101,70101,65101,67101,95130,74102,61107,61106,61101,22106,35101,61102,80101,69101,63001,62001,82001,60001,74001,70001,67001,61001,69001,61201,54245,69201,70201,65201,67201,80201,63201,74235,62901,86207,63201,62201,82201,60201,64201,74201,95105,13161,62101,54142,60101,54142) ~ "Ilmu Sosial dan Politik",
Kode_Prodi_5Digit %in% c(33101,34101,54106,54151,54141,54146,13151,47101,44101,46104,95129,54145,49101,45001,47001,54051,95029,54041,54001,54031,44002,47201,54256,54242,44201,54246,54231,54295,54252,57201,49201,45201,33201,57201,46201,94203,54249,54302,54335,54317,41322,54318,54201,54245,54211,54243,41231,54241,54294,46101,45102,54101,54111) ~ "MIPA",
))#dataframe rumpun ilmu yang akan digunakan untuk di merge dengan data awal
df_rumpun_oke <- df_rumpun %>% select(Kode_Prodi,Rumpun_Ilmu)Analisis
#struktur data
glimpse(df_authors_ok)## Rows: 1,882
## Columns: 30
## $ SINTA_ID <dbl> 6004439, 6690560, 5974526, 5975831, 2188~
## $ Nama <chr> "DAHLANG T", "ANDI NILAWATI USMAN", "MOC~
## $ Universitas <chr> "Universitas Hasanuddin", "Universitas H~
## $ Kode_Prodi <chr> "45001", "15101", "11725", "13001", "440~
## $ Departemen <chr> "S3 - Fisika", "S2 - Ilmu Kebidanan", "S~
## $ Jenjang <fct> S3, S2, Sp-1, S3, S3, S3, Sp-1, S3, S2, ~
## $ Prodi <chr> "Fisika", "Ilmu Kebidanan", "Mikrobiolog~
## $ SINTA_Score_Overall <dbl> 5198, 2535, 10592, 3275, 3115, 3054, 258~
## $ SINTA_Score_3Yr <dbl> 2208, 2086, 2007, 1913, 1878, 1796, 1561~
## $ Affil_Score <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ Affil_Score_3Yr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ Scopus_Artikel <dbl> 140, 67, 245, 185, 73, 74, 147, 133, 127~
## $ Scopus_Citation <dbl> 1360, 85, 2874, 836, 916, 216, 692, 4017~
## $ Scopus_H_Index <dbl> 20, 4, 31, 17, 21, 10, 13, 40, 12, 16, 1~
## $ GScholar_Artikel <dbl> 215, 132, 603, 422, 116, 86, 237, 240, 1~
## $ GScholar_Citation <dbl> 1726, 255, 5493, 1473, 949, 619, 986, 63~
## $ GScholar_H_Index <dbl> 22, 8, 36, 18, 20, 15, 15, 33, 15, 20, 1~
## $ WOS_Artikel <dbl> 69, 56, 198, 0, 22, 35, 17, 67, 18, 56, ~
## $ WOS_Citation <dbl> 1067, 64, 5268, 0, 127, 58, 20, 3537, 17~
## $ WOS_H_Index <chr> "18", "4", "34", "NA", "7", "5", "3", "2~
## $ Status <fct> Aktif, Aktif, Aktif, Aktif, Aktif, Aktif~
## $ Akreditasi <fct> Belum Terakditasi, Baik Sekali, Baik Sek~
## $ Jumlah_Dosen_Penghitung_Rasio <chr> "1", "22", "8", "15", "11", "33", "14", ~
## $ Jumlah_Dosen_NIDN <chr> "6", "6", "4", "9", "5", "12", "6", "6",~
## $ Jumlah_Dosen_NIDK <chr> "0", "0", "0", "0", "0", "0", "2", "0", ~
## $ Jumlah_Dosen_Total <chr> "6", "6", "4", "9", "5", "12", "8", "6",~
## $ Jumlah_Mahasiswa <chr> "2", "218", "24", "152", "9", "124", "14~
## $ Rasio_Dosen_Per_Mahasiswa <chr> "50", "10.0917431192661", "33.3333333333~
## $ jumlah_artikel <dbl> 424, 255, 1046, 607, 211, 195, 401, 440,~
## $ keterangan <fct> Diatas Median, Diatas Median, Diatas Med~
Data
Unit Observasi = Authors
y = SINTA_Score_3Yr yang dikategorisasi menjadi tinggi
dan rendah
x1 = Rumpun Ilmu (Ganjil 2021)
x2 = Level (Ganjil 2021)
x3 = Akreditasi (Ganjil 2021)
x4 = Total Jumlah Dosen (Ganjil 2021)
x5 = Jumlah Mahasiswa (Ganjil 2021)
x6 = Rasio Dosen per Mahasiswa (Ganjil 2021)
data_1 <- df_authors_ok %>%
left_join(df_rumpun_oke, by="Kode_Prodi") %>%
select(SINTA_Score_3Yr,Prodi,Rumpun_Ilmu,Jenjang,Akreditasi,Jumlah_Dosen_Total, Jumlah_Mahasiswa,jumlah_artikel) %>%
mutate(y = ifelse(SINTA_Score_3Yr>=205,"1","0")) #kelas 1:SINTA_Score_3Yr yang tinggi
data_1$y <- as.factor(data_1$y)
data_1$Rumpun_Ilmu <- as.factor(data_1$Rumpun_Ilmu)
data_1$Jumlah_Dosen_Total <- as.numeric(as.character(data_1$Jumlah_Dosen_Total))## Warning: NAs introduced by coercion
data_1$Jumlah_Mahasiswa <- as.numeric(as.character(data_1$Jumlah_Mahasiswa))## Warning: NAs introduced by coercion
data_1$y <- as.factor(data_1$y)
data_1$Rumpun_Ilmu <- as.factor(data_1$Rumpun_Ilmu)
str(data_1)## tibble [2,012 x 9] (S3: tbl_df/tbl/data.frame)
## $ SINTA_Score_3Yr : num [1:2012] 2208 2086 2007 1913 1878 ...
## $ Prodi : chr [1:2012] "Fisika" "Ilmu Kebidanan" "Mikrobiologi Klinik" "Ilmu Kesehatan Masyarakat" ...
## $ Rumpun_Ilmu : Factor w/ 5 levels "Ilmu Budaya",..: 4 3 3 3 4 4 3 4 3 3 ...
## $ Jenjang : Factor w/ 9 levels "Unknown","D3",..: 9 7 6 9 9 9 6 9 7 7 ...
## $ Akreditasi : Factor w/ 5 levels "NA","Belum Terakditasi",..: 2 4 4 5 4 5 2 4 5 5 ...
## $ Jumlah_Dosen_Total: num [1:2012] 6 6 4 9 5 12 8 6 6 5 ...
## $ Jumlah_Mahasiswa : num [1:2012] 2 218 24 152 9 124 14 56 175 110 ...
## $ jumlah_artikel : num [1:2012] 424 255 1046 607 211 ...
## $ y : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
#Cek missing values
md.pattern(data_1,rotate.names = TRUE)## SINTA_Score_3Yr Prodi Jenjang Akreditasi jumlah_artikel y
## 1875 1 1 1 1 1 1
## 32 1 1 1 1 1 1
## 105 1 1 1 1 1 1
## 0 0 0 0 0 0
## Jumlah_Dosen_Total Jumlah_Mahasiswa Rumpun_Ilmu
## 1875 1 1 1 0
## 32 1 1 0 1
## 105 0 0 0 3
## 105 105 137 347
data_1 <- data_1 %>% filter(!is.na(Jumlah_Mahasiswa),!is.na(Jumlah_Dosen_Total),!is.na(Rumpun_Ilmu))
View(data_1) #data yang akan digunakan
## EDA Data
### Peubah Respon (y)#format data yang dibutuhkan
data_chart <- data_1 %>%
group_by(y) %>%
summarize(value=n()) %>%
mutate(prop = round(value / sum(value) *100, digits = 2))
#pie chart: Sebaran Authors Berdasarkan Kategori SINTA_Score_3Yr
ggplot(data_chart, aes(x="", y=prop, fill=y)) +
geom_bar(stat="identity", width=1, color="white") +
coord_polar("y", start=0) +
labs(title= "Proporsi Authors Menurut Kategori SINTA_Score_3Yr",
subtitle = "Universitas Hasanuddin") +
theme_void() Peubah Prediktor (X) Numerik
#Density Jumlah_Dosen_Total
ggplot(data_1, aes(x=Jumlah_Dosen_Total)) +
geom_histogram(fill="#69b3a2", color="#e9ecef", alpha=0.8, bins=15)+
theme_light() +
labs(x="Jumlah_Dosen_Total",
y="Density",
title= "Sebaran Jumlah_Dosen_Total",
subtitle = "Universitas Hasanuddin") #Density Jumlah_Mahasiswa
ggplot(data_1, aes(x=Jumlah_Mahasiswa)) +
geom_histogram(fill="#69b3a2", color="#e9ecef", alpha=0.8, bins=15)+
theme_light() +
labs(x="Jumlah_Mahasiswa",
y="Density",
title= "Sebaran Jumlah_Mahasiswa",
subtitle = "Universitas Hasanuddin") #Density Rasio_Dosen_per_Mahasiswa
ggplot(data_1, aes(x=jumlah_artikel)) +
geom_histogram(fill="#69b3a2", color="#e9ecef", alpha=0.8, bins=20)+
theme_light() +
labs(x="Jumlah Artikel",
y="Density",
title= "Sebaran Jumlah Artikel",
subtitle = "Universitas Hasanuddin")
### Peubah Prediktor (X) Kategorik
# Akreditasi
data_bar_chart = data_1 %>%
group_by(Akreditasi)%>%
summarize(Jumlah=n())
ggplot(data_bar_chart, aes(x=Akreditasi, y=Jumlah)) +
geom_bar(stat = "identity",color="steelblue") +
theme_light() +
labs(x="",
y="",
title= "",
subtitle = "Universitas Hasanuddin") +
coord_flip()# Level
data_bar_chart = data_1 %>%
group_by(Jenjang)%>%
summarize(Jumlah=n())
ggplot(data_bar_chart, aes(x=Jenjang, y=Jumlah)) +
geom_bar(stat = "identity",color="steelblue") +
theme_light() +
labs(x="",
y="",
title= "",
subtitle = "Universitas Hasanuddin") +
coord_flip()# Rumpun Ilmu
data_bar_chart = data_1 %>%
group_by(Rumpun_Ilmu)%>%
summarize(Jumlah=n())
ggplot(data_bar_chart, aes(x=(Rumpun_Ilmu), y=Jumlah)) +
geom_bar(stat = "identity",color="steelblue") +
theme_light() +
labs(x="",
y="",
title= "",
subtitle = "Universitas Hasanuddin") +
coord_flip()Hubungan Peubah Prediktor dengan Peubah Respon
# Akreditasi & y
percentData <- data_1 %>%
group_by(Akreditasi) %>%
count(y) %>%
mutate(ratio=scales::percent(n/sum(n)))
ggplot(data_1,aes(x=factor(Akreditasi),fill=y,))+
geom_bar(position="fill")+
scale_fill_manual(values=c("#7be217", "#4f58ab"))+
geom_text(data=percentData, aes(y=n,label=ratio), color="white",position=position_fill(vjust=0.5))+
labs(
y = "",
x = "Akreditasi",
subtitle = "Unhas",
title = "Proporsi Peubah Respon Menurut Akreditasi")# Level & y
percentData <- data_1 %>%
group_by(Jenjang) %>%
count(y) %>%
mutate(ratio=scales::percent(n/sum(n)))
ggplot(data_1,aes(x=factor(Jenjang),fill=y,))+
geom_bar(position="fill")+
scale_fill_manual(values=c("#7be217", "#4f58ab"))+
geom_text(data=percentData, aes(y=n,label=ratio), color="white",position=position_fill(vjust=0.5))+
labs(
y = "",
x = "Level",
subtitle = "Unhas",
title = "Proporsi Peubah Respon Menurut Level")# Rumpun_Ilmu & y
percentData <- data_1 %>%
group_by(Rumpun_Ilmu) %>%
count(y) %>%
mutate(ratio=scales::percent(n/sum(n)))
ggplot(data_1,aes(x=factor(Rumpun_Ilmu),fill=y,))+
geom_bar(position="fill")+
scale_fill_manual(values=c("#7be217", "#4f58ab"))+
geom_text(data=percentData, aes(y=n,label=ratio), color="white",position=position_fill(vjust=0.5))+
labs(
y = "",
x = "Rumpun_Ilmu",
subtitle = "UNP",
title = "Proporsi Peubah Respon Menurut Rumpun Ilmu")# Jumlah_Mahasiswa & y
#Boxplot by kategori
ggplot(data_1, aes(y=y,x=Jumlah_Mahasiswa,fill=Jumlah_Mahasiswa,alpha=Jumlah_Mahasiswa)) +
geom_boxplot(fill="#69b3a2", alpha=0.8) +
theme_light() +
labs(x="Jumlah_Mahasiswa",
y="y",
title= "Sebaran Jumlah Mahasiswa Menurut Peubah Respon",
subtitle = "Universitas Hasanuddin") # Jumlah_Dosen_Total & y
#Boxplot by kategori
ggplot(data_1, aes(y=y,x=Jumlah_Dosen_Total,fill=Jumlah_Dosen_Total,alpha=Jumlah_Dosen_Total)) +
geom_boxplot(fill="#69b3a2", alpha=0.8) +
theme_light() +
labs(x="Jumlah_Dosen_Total",
y="y",
title= "Sebaran Jumlah Dosen Total Menurut Peubah Respon",
subtitle = "Universitas Hasanuddin") # Rasio_Dosen_per_Mahasiswa & y
#Boxplot by kategori
ggplot(data_1, aes(y=y,x=jumlah_artikel,fill=jumlah_artikel,alpha=jumlah_artikel)) +
geom_boxplot(fill="#69b3a2", alpha=0.8) +
theme_light() +
labs(x="Jumlah_Artikel",
y="y",
title= "Sebaran Jumlah Artikel Menurut Peubah Respon",
subtitle = "Universitas Hasanuddin") Data Model
#data yang akan digunakan untuk model
data_sinta <- data_1 %>% select(-c(SINTA_Score_3Yr,Prodi))
str(data_sinta)## tibble [1,875 x 7] (S3: tbl_df/tbl/data.frame)
## $ Rumpun_Ilmu : Factor w/ 5 levels "Ilmu Budaya",..: 4 3 3 3 4 4 3 4 3 3 ...
## $ Jenjang : Factor w/ 9 levels "Unknown","D3",..: 9 7 6 9 9 9 6 9 7 7 ...
## $ Akreditasi : Factor w/ 5 levels "NA","Belum Terakditasi",..: 2 4 4 5 4 5 2 4 5 5 ...
## $ Jumlah_Dosen_Total: num [1:1875] 6 6 4 9 5 12 8 6 6 5 ...
## $ Jumlah_Mahasiswa : num [1:1875] 2 218 24 152 9 124 14 56 175 110 ...
## $ jumlah_artikel : num [1:1875] 424 255 1046 607 211 ...
## $ y : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
Splitting Data
set.seed(478)
in.train <- createDataPartition(as.factor(data_sinta$y),p=0.7,list=F) #partisi data
data_sinta_train <- data_sinta[in.train,] #data training utk modelling
data_sinta_test<- data_sinta[-in.train,] #data testing utk evaluasi model
#proporsi kelas peubah respon pada data
round(prop.table(table(data_sinta_train$y)), digits = 4)##
## 0 1
## 0.7443 0.2557
round(prop.table(table(data_sinta_test$y)), digits = 4)##
## 0 1
## 0.7451 0.2549
Regresi Logistik
Semua Peubah
model_reglog_1 <- glm(y~., data_sinta_train, family=binomial())
summary(model_reglog_1)##
## Call:
## glm(formula = y ~ ., family = binomial(), data = data_sinta_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.5488 -0.5079 -0.3080 0.0120 2.6593
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.4375896 1.2829283 -5.018 5.22e-07 ***
## Rumpun_IlmuIlmu Sosial dan Politik 0.1966512 0.6426449 0.306 0.759602
## Rumpun_IlmuKesehatan 1.7174950 0.6493347 2.645 0.008169 **
## Rumpun_IlmuMIPA 2.1701394 0.6149380 3.529 0.000417 ***
## Rumpun_IlmuTeknik 1.9467967 0.6205277 3.137 0.001705 **
## JenjangD3 2.8227428 1.6993237 1.661 0.096694 .
## JenjangD4 2.6438339 1.2993506 2.035 0.041878 *
## JenjangS1 1.7074280 1.0887359 1.568 0.116819
## JenjangProfesi 1.0543093 1.2711730 0.829 0.406879
## JenjangSp-1 1.4468122 1.1564136 1.251 0.210891
## JenjangS2 2.6475805 1.0909377 2.427 0.015229 *
## JenjangSp-2 3.1776611 1.7940280 1.771 0.076520 .
## JenjangS3 2.7677093 1.1224746 2.466 0.013674 *
## AkreditasiBaik 0.0409923 0.4505642 0.091 0.927509
## AkreditasiBaik Sekali 0.0789481 0.3736678 0.211 0.832670
## AkreditasiUnggul 0.1273243 0.3110881 0.409 0.682329
## Jumlah_Dosen_Total -0.0171818 0.0107885 -1.593 0.111249
## Jumlah_Mahasiswa 0.0007409 0.0003917 1.891 0.058580 .
## jumlah_artikel 0.0278937 0.0021611 12.907 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1494.07 on 1313 degrees of freedom
## Residual deviance: 914.32 on 1295 degrees of freedom
## AIC: 952.32
##
## Number of Fisher Scoring iterations: 6
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_reglog_1, data_sinta_train, type = "response")
prediksi_data_train <- as.factor(ifelse(prediksi_prob_data_train > 0.5,"1","0"))
eval_reglog_1_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_reglog_1_train## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 926 140
## 1 52 196
##
## Accuracy : 0.8539
## 95% CI : (0.8336, 0.8726)
## No Information Rate : 0.7443
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.58
##
## Mcnemar's Test P-Value : 3.415e-10
##
## Sensitivity : 0.5833
## Specificity : 0.9468
## Pos Pred Value : 0.7903
## Neg Pred Value : 0.8687
## Prevalence : 0.2557
## Detection Rate : 0.1492
## Detection Prevalence : 0.1887
## Balanced Accuracy : 0.7651
##
## 'Positive' Class : 1
##
Sensitivity: kemampuan model dalam memprediksi kelaspositif
Specificity: kemampuan model dalam memprediksi kelasnegatif
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_reglog_1, data_sinta_test, type = "response")
prediksi_data_test <- as.factor(ifelse(prediksi_prob_data_test > 0.5,"1","0"))
eval_reglog_1 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_reglog_1## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 389 48
## 1 29 95
##
## Accuracy : 0.8627
## 95% CI : (0.8315, 0.8901)
## No Information Rate : 0.7451
## P-Value [Acc > NIR] : 6.93e-12
##
## Kappa : 0.6221
##
## Mcnemar's Test P-Value : 0.04024
##
## Sensitivity : 0.6643
## Specificity : 0.9306
## Pos Pred Value : 0.7661
## Neg Pred Value : 0.8902
## Prevalence : 0.2549
## Detection Rate : 0.1693
## Detection Prevalence : 0.2210
## Balanced Accuracy : 0.7975
##
## 'Positive' Class : 1
##
Performa model pada data training dan data testing perlu diperhatikan untuk mengetahui adanya overfiting/underfiting
Overfiting terjadi ketika performa model pada data training jauh lebih tinggi jika dibandingkan dengan performa model pada data testing (mempelajari data terlalu baik)
Underfiting terjadi ketika performa model pada data testing jauh lebih tinggi jika dibandingkan dengan performa model pada data training (tidak mempelajari data dengan baik)
#fungsi utk membentuk plot ROC
rocplot=function(pred,truth, ...){
predob=ROCR::prediction(pred,truth)
perf=ROCR::performance(predob,"tpr","fpr")
auc=ROCR::performance(predob,"auc")@y.values
plot(perf,main = auc)
}#ROC data training
rocplot(prediksi_prob_data_train,data_sinta_train$y) #ROC data testing
rocplot(prediksi_prob_data_test,data_sinta_test$y)#variable importance
vip(model_reglog_1, num_features = 50)Seleksi Peubah
model_reglog_2 <- glm(y~Jenjang+Jumlah_Mahasiswa , data_sinta, family=binomial())
summary(model_reglog_2)##
## Call:
## glm(formula = y ~ Jenjang + Jumlah_Mahasiswa, family = binomial(),
## data = data_sinta)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1283 -0.6862 -0.6486 1.2289 2.4910
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.0284969 0.7239119 -4.184 2.87e-05 ***
## JenjangD3 0.7565670 1.2739870 0.594 0.552607
## JenjangD4 2.2090085 0.8538768 2.587 0.009681 **
## JenjangS1 1.7220577 0.7308360 2.356 0.018459 *
## JenjangProfesi 0.5643365 0.8903418 0.634 0.526184
## JenjangSp-1 1.7218649 0.7541232 2.283 0.022415 *
## JenjangS2 2.6014889 0.7301575 3.563 0.000367 ***
## JenjangSp-2 1.9318995 1.0911585 1.771 0.076643 .
## JenjangS3 2.9123542 0.7376778 3.948 7.88e-05 ***
## Jumlah_Mahasiswa -0.0002643 0.0001638 -1.614 0.106591
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2131.0 on 1874 degrees of freedom
## Residual deviance: 1994.8 on 1865 degrees of freedom
## AIC: 2014.8
##
## Number of Fisher Scoring iterations: 5
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_reglog_2, data_sinta_train, type = "response")
prediksi_data_train <- as.factor(ifelse(prediksi_prob_data_train > 0.5,"1","0"))
eval_reglog_2_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")## Warning in confusionMatrix.default(prediksi_data_train, data_sinta_train$y, :
## Levels are not in the same order for reference and data. Refactoring data to
## match.
eval_reglog_2_train## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 978 336
## 1 0 0
##
## Accuracy : 0.7443
## 95% CI : (0.7198, 0.7677)
## No Information Rate : 0.7443
## P-Value [Acc > NIR] : 0.5147
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.7443
## Prevalence : 0.2557
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : 1
##
rocplot(prediksi_prob_data_train,data_sinta_train$y) # Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_reglog_2, data_sinta_test, type = "response")
prediksi_data_test <- as.factor(ifelse(prediksi_prob_data_test > 0.5,"1","0"))
eval_reglog_2 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")## Warning in confusionMatrix.default(prediksi_data_test, data_sinta_test$y, :
## Levels are not in the same order for reference and data. Refactoring data to
## match.
eval_reglog_2## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 418 143
## 1 0 0
##
## Accuracy : 0.7451
## 95% CI : (0.7069, 0.7807)
## No Information Rate : 0.7451
## P-Value [Acc > NIR] : 0.5225
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.7451
## Prevalence : 0.2549
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : 1
##
rocplot(prediksi_prob_data_test,data_sinta_test$y)vip(model_reglog_2, num_features = 50)Classification Tree
Model 1 Default
Model dengan hyperparameter
minsplitdancpdefault
model_tree_1 <- rpart(y ~., data = data_sinta_train, method = "class",
control=rpart.control(minsplit = 20, cp=0))
rpart.plot(model_tree_1, extra = 4)# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_tree_1, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_tree_1, newdata=data_sinta_train, type = "class")
eval_tree_1_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_tree_1_train## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 906 81
## 1 72 255
##
## Accuracy : 0.8836
## 95% CI : (0.865, 0.9004)
## No Information Rate : 0.7443
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6914
##
## Mcnemar's Test P-Value : 0.5178
##
## Sensitivity : 0.7589
## Specificity : 0.9264
## Pos Pred Value : 0.7798
## Neg Pred Value : 0.9179
## Prevalence : 0.2557
## Detection Rate : 0.1941
## Detection Prevalence : 0.2489
## Balanced Accuracy : 0.8427
##
## 'Positive' Class : 1
##
rocplot(prediksi_prob_data_train[,2],data_sinta_train$y) # Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_tree_1, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_tree_1, newdata=data_sinta_test, type = "class")
eval_tree_1 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_tree_1## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 369 55
## 1 49 88
##
## Accuracy : 0.8146
## 95% CI : (0.7799, 0.8459)
## No Information Rate : 0.7451
## P-Value [Acc > NIR] : 5.915e-05
##
## Kappa : 0.5051
##
## Mcnemar's Test P-Value : 0.6239
##
## Sensitivity : 0.6154
## Specificity : 0.8828
## Pos Pred Value : 0.6423
## Neg Pred Value : 0.8703
## Prevalence : 0.2549
## Detection Rate : 0.1569
## Detection Prevalence : 0.2442
## Balanced Accuracy : 0.7491
##
## 'Positive' Class : 1
##
rocplot(prediksi_prob_data_test[,2],data_sinta_test$y)vip(model_tree_1, num_features = 50)Model 2
Model dengan hyperparameter
minsplitdancpyang ditentukan sendiri (minsplit=10dancp=0)
model_tree_2 <- rpart(y ~., data = data_sinta_train, method = "class",
control=rpart.control(minsplit = 10, cp=0))
rpart.plot(model_tree_2)## Warning: labs do not fit even at cex 0.15, there may be some overplotting
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_tree_2, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_tree_2, newdata=data_sinta_train, type = "class")
eval_tree_2_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_tree_2_train## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 926 59
## 1 52 277
##
## Accuracy : 0.9155
## 95% CI : (0.8992, 0.93)
## No Information Rate : 0.7443
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.7765
##
## Mcnemar's Test P-Value : 0.569
##
## Sensitivity : 0.8244
## Specificity : 0.9468
## Pos Pred Value : 0.8419
## Neg Pred Value : 0.9401
## Prevalence : 0.2557
## Detection Rate : 0.2108
## Detection Prevalence : 0.2504
## Balanced Accuracy : 0.8856
##
## 'Positive' Class : 1
##
ROC_model_tree_2_train <- rocit(score=prediksi_prob_data_train[,2], class=data_sinta_train$y)
plot(ROC_model_tree_2_train)ROC_model_tree_2_train$AUC## [1] 0.9640179
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_tree_2, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_tree_2, newdata=data_sinta_test, type = "class")
eval_tree_2 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_tree_2## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 357 54
## 1 61 89
##
## Accuracy : 0.795
## 95% CI : (0.7592, 0.8277)
## No Information Rate : 0.7451
## P-Value [Acc > NIR] : 0.003282
##
## Kappa : 0.4689
##
## Mcnemar's Test P-Value : 0.575819
##
## Sensitivity : 0.6224
## Specificity : 0.8541
## Pos Pred Value : 0.5933
## Neg Pred Value : 0.8686
## Prevalence : 0.2549
## Detection Rate : 0.1586
## Detection Prevalence : 0.2674
## Balanced Accuracy : 0.7382
##
## 'Positive' Class : 1
##
ROC_model_tree_2 <- rocit(score=prediksi_prob_data_test[,2], class=data_sinta_test$y)
plot(ROC_model_tree_2)ROC_model_tree_2$AUC## [1] 0.8494914
vip(model_tree_2, num_features = 50)Model 3 Tuning Minsplit
Model dengan hyperparameter
minsplitoptimum
#mencari minsplit optimum
set.seed(478)
akurasi.semua <- NULL
for(ulangan in 1:100){
acak <- createDataPartition(data_sinta$y, p=0.7, list=FALSE)
data_sinta_train <- data_sinta[acak,]
data_sinta_test <- data_sinta[-acak,]
for (k in 1:30){
pohon <- rpart(y ~ .,
data=data_sinta_train,
method='class',
control=rpart.control(minsplit = k, cp=0))
prediksi.prob <- predict(pohon, data_sinta_test)
prediksi <- ifelse(prediksi.prob > 0.5, "1", "0")[,2]
akurasi <- mean(prediksi == data_sinta_test$y)
akurasi.semua <- rbind(akurasi.semua, c(k, akurasi))
}
}
mean.akurasi <- tapply(akurasi.semua[,2], akurasi.semua[,1], mean)
plot(names(mean.akurasi),mean.akurasi, type="b", xlab="minsplit", ylab="rata-rata akurasi data testing")model_tree_3 <- rpart(y ~., data = data_sinta_train, method = "class",
control=rpart.control(minsplit = 11, cp=0))
rpart.plot(model_tree_3, extra=4)## Warning: labs do not fit even at cex 0.15, there may be some overplotting
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_tree_3, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_tree_3, newdata=data_sinta_train, type = "class")
eval_tree_3_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_tree_3_train## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 938 61
## 1 40 275
##
## Accuracy : 0.9231
## 95% CI : (0.9074, 0.937)
## No Information Rate : 0.7443
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.7938
##
## Mcnemar's Test P-Value : 0.04658
##
## Sensitivity : 0.8185
## Specificity : 0.9591
## Pos Pred Value : 0.8730
## Neg Pred Value : 0.9389
## Prevalence : 0.2557
## Detection Rate : 0.2093
## Detection Prevalence : 0.2397
## Balanced Accuracy : 0.8888
##
## 'Positive' Class : 1
##
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_tree_3, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_tree_3, newdata=data_sinta_test, type = "class")
eval_tree_3 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_tree_3## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 369 60
## 1 49 83
##
## Accuracy : 0.8057
## 95% CI : (0.7705, 0.8377)
## No Information Rate : 0.7451
## P-Value [Acc > NIR] : 0.0004311
##
## Kappa : 0.4752
##
## Mcnemar's Test P-Value : 0.3381504
##
## Sensitivity : 0.5804
## Specificity : 0.8828
## Pos Pred Value : 0.6288
## Neg Pred Value : 0.8601
## Prevalence : 0.2549
## Detection Rate : 0.1480
## Detection Prevalence : 0.2353
## Balanced Accuracy : 0.7316
##
## 'Positive' Class : 1
##
vip(model_tree_3, num_features = 50)Model 4 Opsi CP
Model dengan hyperparameter
cpoptimum
set.seed(478)
model_tree_4 <- rpart(y ~ ., data=data_sinta_train,
method='class',
control=rpart.control(minsplit = 20, cp=0))
printcp(model_tree_4)##
## Classification tree:
## rpart(formula = y ~ ., data = data_sinta_train, method = "class",
## control = rpart.control(minsplit = 20, cp = 0))
##
## Variables actually used in tree construction:
## [1] Akreditasi Jenjang jumlah_artikel Jumlah_Dosen_Total
## [5] Jumlah_Mahasiswa Rumpun_Ilmu
##
## Root node error: 336/1314 = 0.25571
##
## n= 1314
##
## CP nsplit rel error xerror xstd
## 1 0.39285714 0 1.00000 1.00000 0.047065
## 2 0.10714286 1 0.60714 0.61607 0.039303
## 3 0.00793651 2 0.50000 0.51488 0.036478
## 4 0.00595238 5 0.47619 0.56250 0.037859
## 5 0.00496032 12 0.43155 0.55655 0.037692
## 6 0.00396825 15 0.41667 0.55655 0.037692
## 7 0.00297619 18 0.40476 0.54762 0.037438
## 8 0.00148810 20 0.39881 0.59226 0.038675
## 9 0.00099206 24 0.39286 0.59226 0.038675
## 10 0.00000000 27 0.38988 0.59821 0.038834
model_tree_4 <- rpart(y ~ ., data=data_sinta_train,
method='class',
control=rpart.control(minsplit = 20, cp=0.0066225))
rpart.plot(model_tree_4)# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_tree_4, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_tree_4, newdata=data_sinta_train, type = "class")
eval_tree_4_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_tree_4_train## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 894 76
## 1 84 260
##
## Accuracy : 0.8782
## 95% CI : (0.8593, 0.8954)
## No Information Rate : 0.7443
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6826
##
## Mcnemar's Test P-Value : 0.58
##
## Sensitivity : 0.7738
## Specificity : 0.9141
## Pos Pred Value : 0.7558
## Neg Pred Value : 0.9216
## Prevalence : 0.2557
## Detection Rate : 0.1979
## Detection Prevalence : 0.2618
## Balanced Accuracy : 0.8440
##
## 'Positive' Class : 1
##
ROC_model_tree_4_train <- rocit(score=prediksi_prob_data_train[,2], class=data_sinta_train$y)
plot(ROC_model_tree_4_train)ROC_model_tree_4_train$AUC## [1] 0.8952399
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_tree_4, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_tree_4, newdata=data_sinta_test, type = "class")
eval_tree_4 <- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_tree_4## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 371 45
## 1 47 98
##
## Accuracy : 0.836
## 95% CI : (0.8027, 0.8657)
## No Information Rate : 0.7451
## P-Value [Acc > NIR] : 1.553e-07
##
## Kappa : 0.5703
##
## Mcnemar's Test P-Value : 0.917
##
## Sensitivity : 0.6853
## Specificity : 0.8876
## Pos Pred Value : 0.6759
## Neg Pred Value : 0.8918
## Prevalence : 0.2549
## Detection Rate : 0.1747
## Detection Prevalence : 0.2585
## Balanced Accuracy : 0.7864
##
## 'Positive' Class : 1
##
ROC_model_tree_4 <- rocit(score=prediksi_prob_data_test[,2], class=data_sinta_test$y)
plot(ROC_model_tree_4)ROC_model_tree_4$AUC## [1] 0.8122511
vip(model_tree_4, num_features = 50)Bagging
Model Default
Model dengan hyperparameter
nbaggdefault dantreedefault
model_bag_1 <- ipred::bagging(y ~ ., data=data_sinta_train, coob = TRUE,
nbagg=25,
control= rpart.control(minsplit=2, cp=0))
model_bag_1##
## Bagging classification trees with 25 bootstrap replications
##
## Call: bagging.data.frame(formula = y ~ ., data = data_sinta_train,
## coob = TRUE, nbagg = 25, control = rpart.control(minsplit = 2,
## cp = 0))
##
## Out-of-bag estimate of misclassification error: 0.1461
# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_bag_1, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_bag_1, data_sinta_train,type="class")
eval_model_bag_1_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_model_bag_1_train## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 975 1
## 1 3 335
##
## Accuracy : 0.997
## 95% CI : (0.9922, 0.9992)
## No Information Rate : 0.7443
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.992
##
## Mcnemar's Test P-Value : 0.6171
##
## Sensitivity : 0.9970
## Specificity : 0.9969
## Pos Pred Value : 0.9911
## Neg Pred Value : 0.9990
## Prevalence : 0.2557
## Detection Rate : 0.2549
## Detection Prevalence : 0.2572
## Balanced Accuracy : 0.9970
##
## 'Positive' Class : 1
##
ROC_model_bag_1_train <- rocit(score=prediksi_prob_data_train[,2], class=data_sinta_train$y)
plot(ROC_model_bag_1_train)ROC_model_bag_1_train$AUC## [1] 0.9998783
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_bag_1, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_bag_1, data_sinta_test,type="class")
eval_model_bag_1<- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_model_bag_1## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 368 66
## 1 50 77
##
## Accuracy : 0.7932
## 95% CI : (0.7573, 0.826)
## No Information Rate : 0.7451
## P-Value [Acc > NIR] : 0.00444
##
## Kappa : 0.4348
##
## Mcnemar's Test P-Value : 0.16371
##
## Sensitivity : 0.5385
## Specificity : 0.8804
## Pos Pred Value : 0.6063
## Neg Pred Value : 0.8479
## Prevalence : 0.2549
## Detection Rate : 0.1373
## Detection Prevalence : 0.2264
## Balanced Accuracy : 0.7094
##
## 'Positive' Class : 1
##
ROC_model_bag_1 <- rocit(score=prediksi_prob_data_test[,2], class=data_sinta_test$y)
plot(ROC_model_bag_1)ROC_model_bag_1$AUC## [1] 0.8295747
Random Forest
Model 1 Default
Model dengan hyperparameter
ntree,mtrydefault
model_rf_1 <- randomForest::randomForest(y ~ ., ntree=500,
data=data_sinta_train)# Prediksi pada Data Training
prediksi_prob_data_train <- predict(model_rf_1, data_sinta_train, type = "prob")
prediksi_data_train <- predict(model_rf_1, data_sinta_train,type="class")
eval_model_rf_1_train <- caret::confusionMatrix(prediksi_data_train, data_sinta_train$y, positive="1")
eval_model_rf_1_train## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 969 21
## 1 9 315
##
## Accuracy : 0.9772
## 95% CI : (0.9676, 0.9845)
## No Information Rate : 0.7443
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.9393
##
## Mcnemar's Test P-Value : 0.04461
##
## Sensitivity : 0.9375
## Specificity : 0.9908
## Pos Pred Value : 0.9722
## Neg Pred Value : 0.9788
## Prevalence : 0.2557
## Detection Rate : 0.2397
## Detection Prevalence : 0.2466
## Balanced Accuracy : 0.9641
##
## 'Positive' Class : 1
##
ROC_model_rf_1_train <- rocit(score=prediksi_prob_data_train[,2], class=data_sinta_train$y)
plot(ROC_model_rf_1_train)ROC_model_rf_1_train$AUC## [1] 0.9977907
# Prediksi pada Data Testing
prediksi_prob_data_test <- predict(model_rf_1, data_sinta_test, type = "prob")
prediksi_data_test <- predict(model_rf_1, data_sinta_test,type="class")
eval_model_rf_1<- caret::confusionMatrix(prediksi_data_test, data_sinta_test$y, positive="1")
eval_model_rf_1## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 377 61
## 1 41 82
##
## Accuracy : 0.8182
## 95% CI : (0.7837, 0.8492)
## No Information Rate : 0.7451
## P-Value [Acc > NIR] : 2.471e-05
##
## Kappa : 0.4983
##
## Mcnemar's Test P-Value : 0.05993
##
## Sensitivity : 0.5734
## Specificity : 0.9019
## Pos Pred Value : 0.6667
## Neg Pred Value : 0.8607
## Prevalence : 0.2549
## Detection Rate : 0.1462
## Detection Prevalence : 0.2193
## Balanced Accuracy : 0.7377
##
## 'Positive' Class : 1
##
ROC_model_rf_1 <- rocit(score=prediksi_prob_data_test[,2], class=data_sinta_test$y)
plot(ROC_model_rf_1)ROC_model_rf_1$AUC## [1] 0.8612524
vip(model_rf_1, num_features = 50)Perbandingan Hasil Model
hasil_eval <- rbind(
c(eval_reglog_1$overall[1], eval_reglog_1$byClass[1], eval_reglog_1$byClass[2]),
c(eval_reglog_2$overall[1], eval_reglog_2$byClass[1], eval_reglog_2$byClass[2]),
c(eval_tree_1$overall[1], eval_tree_1$byClass[1], eval_tree_1$byClass[2]),
c(eval_tree_2$overall[1], eval_tree_2$byClass[1], eval_tree_2$byClass[2]),
c(eval_tree_3$overall[1], eval_tree_3$byClass[1], eval_tree_3$byClass[2]),
c(eval_tree_4$overall[1], eval_tree_4$byClass[1], eval_tree_4$byClass[2]),
c(eval_model_bag_1$overall[1], eval_model_bag_1$byClass[1], eval_model_bag_1$byClass[2]),
c(eval_model_rf_1$overall[1], eval_model_rf_1$byClass[1], eval_model_rf_1$byClass[2]))
row.names(hasil_eval) <-
c("RegLog Semua Peubah","RegLog Seleksi Peubah",
"ClassTree 1","ClassTree 2","ClassTree 3","ClassTree 4",
"Bagging 1", "RandomForest 1")
hasil_eval <- as.data.frame(hasil_eval)
dplyr::arrange(.data = hasil_eval, desc(Accuracy))## Accuracy Sensitivity Specificity
## RegLog Semua Peubah 0.8627451 0.6643357 0.9306220
## ClassTree 4 0.8360071 0.6853147 0.8875598
## RandomForest 1 0.8181818 0.5734266 0.9019139
## ClassTree 1 0.8146168 0.6153846 0.8827751
## ClassTree 3 0.8057041 0.5804196 0.8827751
## ClassTree 2 0.7950089 0.6223776 0.8540670
## Bagging 1 0.7932264 0.5384615 0.8803828
## RegLog Seleksi Peubah 0.7450980 0.0000000 1.0000000